3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_opt *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
144 if ((rover < low) || (rover > high))
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
158 /* Exhausted local port range during search? */
163 /* OK, here is the one we will use. */
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
181 if (tcp_v6_bind_conflict(sk, tb))
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205 spin_unlock(&head->lock);
211 static __inline__ void __tcp_v6_hash(struct sock *sk)
213 struct hlist_head *list;
216 BUG_TRAP(sk_unhashed(sk));
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
235 static void tcp_v6_hash(struct sock *sk)
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_opt *tp = tcp_sk(sk);
240 if (tp->af_specific == &ipv6_mapped) {
250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
253 struct hlist_node *node;
254 struct sock *result = NULL;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
278 if (score > hiscore) {
286 read_unlock(&tcp_lhash_lock);
290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
293 * The sockhash lock must be held as a reader here.
296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
300 struct tcp_ehash_bucket *head;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
325 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330 read_unlock(&head->lock);
335 read_unlock(&head->lock);
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
369 * Open request hash tables.
372 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
376 a = raddr->s6_addr32[0];
377 b = raddr->s6_addr32[1];
378 c = raddr->s6_addr32[2];
380 a += JHASH_GOLDEN_RATIO;
381 b += JHASH_GOLDEN_RATIO;
383 __jhash_mix(a, b, c);
385 a += raddr->s6_addr32[3];
387 __jhash_mix(a, b, c);
389 return c & (TCP_SYNQ_HSIZE - 1);
392 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
393 struct open_request ***prevp,
395 struct in6_addr *raddr,
396 struct in6_addr *laddr,
399 struct tcp_listen_opt *lopt = tp->listen_opt;
400 struct open_request *req, **prev;
402 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
403 (req = *prev) != NULL;
404 prev = &req->dl_next) {
405 if (req->rmt_port == rport &&
406 req->class->family == AF_INET6 &&
407 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
408 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
409 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
410 BUG_TRAP(req->sk == NULL);
419 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
420 struct in6_addr *saddr,
421 struct in6_addr *daddr,
424 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
427 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
429 if (skb->protocol == htons(ETH_P_IPV6)) {
430 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
431 skb->nh.ipv6h->saddr.s6_addr32,
435 return secure_tcp_sequence_number(skb->nh.iph->daddr,
442 static int tcp_v6_check_established(struct sock *sk)
444 struct inet_opt *inet = inet_sk(sk);
445 struct ipv6_pinfo *np = inet6_sk(sk);
446 struct in6_addr *daddr = &np->rcv_saddr;
447 struct in6_addr *saddr = &np->daddr;
448 int dif = sk->sk_bound_dev_if;
449 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
450 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
451 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
453 struct hlist_node *node;
454 struct tcp_tw_bucket *tw;
456 write_lock_bh(&head->lock);
458 /* Check TIME-WAIT sockets first. */
459 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
460 tw = (struct tcp_tw_bucket*)sk2;
462 if(*((__u32 *)&(tw->tw_dport)) == ports &&
463 sk2->sk_family == PF_INET6 &&
464 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
465 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
466 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
467 struct tcp_opt *tp = tcp_sk(sk);
469 if (tw->tw_ts_recent_stamp) {
470 /* See comment in tcp_ipv4.c */
471 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
474 tp->ts_recent = tw->tw_ts_recent;
475 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
484 /* And established part... */
485 sk_for_each(sk2, node, &head->chain) {
486 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
491 BUG_TRAP(sk_unhashed(sk));
492 __sk_add_node(sk, &head->chain);
493 sk->sk_hashent = hash;
494 sock_prot_inc_use(sk->sk_prot);
495 write_unlock_bh(&head->lock);
498 /* Silly. Should hash-dance instead... */
500 tcp_tw_deschedule(tw);
501 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
509 write_unlock_bh(&head->lock);
510 return -EADDRNOTAVAIL;
513 static int tcp_v6_hash_connect(struct sock *sk)
515 struct tcp_bind_hashbucket *head;
516 struct tcp_bind_bucket *tb;
519 if (inet_sk(sk)->num == 0) {
520 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
523 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
526 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
529 spin_lock_bh(&head->lock);
531 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
533 spin_unlock_bh(&head->lock);
536 spin_unlock_bh(&head->lock);
537 return tcp_v6_check_established(sk);
541 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
543 return IP6CB(skb)->iif;
546 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
549 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
550 struct inet_opt *inet = inet_sk(sk);
551 struct ipv6_pinfo *np = inet6_sk(sk);
552 struct tcp_opt *tp = tcp_sk(sk);
553 struct in6_addr *saddr = NULL, *final_p = NULL, final;
555 struct dst_entry *dst;
559 if (addr_len < SIN6_LEN_RFC2133)
562 if (usin->sin6_family != AF_INET6)
563 return(-EAFNOSUPPORT);
565 memset(&fl, 0, sizeof(fl));
568 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
569 IP6_ECN_flow_init(fl.fl6_flowlabel);
570 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
571 struct ip6_flowlabel *flowlabel;
572 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
573 if (flowlabel == NULL)
575 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
576 fl6_sock_release(flowlabel);
581 * connect() to INADDR_ANY means loopback (BSD'ism).
584 if(ipv6_addr_any(&usin->sin6_addr))
585 usin->sin6_addr.s6_addr[15] = 0x1;
587 addr_type = ipv6_addr_type(&usin->sin6_addr);
589 if(addr_type & IPV6_ADDR_MULTICAST)
592 if (addr_type&IPV6_ADDR_LINKLOCAL) {
593 if (addr_len >= sizeof(struct sockaddr_in6) &&
594 usin->sin6_scope_id) {
595 /* If interface is set while binding, indices
598 if (sk->sk_bound_dev_if &&
599 sk->sk_bound_dev_if != usin->sin6_scope_id)
602 sk->sk_bound_dev_if = usin->sin6_scope_id;
605 /* Connect to link-local address requires an interface */
606 if (!sk->sk_bound_dev_if)
610 if (tp->ts_recent_stamp &&
611 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
613 tp->ts_recent_stamp = 0;
617 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
618 np->flow_label = fl.fl6_flowlabel;
624 if (addr_type == IPV6_ADDR_MAPPED) {
625 u32 exthdrlen = tp->ext_header_len;
626 struct sockaddr_in sin;
628 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
630 if (__ipv6_only_sock(sk))
633 sin.sin_family = AF_INET;
634 sin.sin_port = usin->sin6_port;
635 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
637 tp->af_specific = &ipv6_mapped;
638 sk->sk_backlog_rcv = tcp_v4_do_rcv;
640 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
643 tp->ext_header_len = exthdrlen;
644 tp->af_specific = &ipv6_specific;
645 sk->sk_backlog_rcv = tcp_v6_do_rcv;
648 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
650 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
657 if (!ipv6_addr_any(&np->rcv_saddr))
658 saddr = &np->rcv_saddr;
660 fl.proto = IPPROTO_TCP;
661 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
662 ipv6_addr_copy(&fl.fl6_src,
663 (saddr ? saddr : &np->saddr));
664 fl.oif = sk->sk_bound_dev_if;
665 fl.fl_ip_dport = usin->sin6_port;
666 fl.fl_ip_sport = inet->sport;
668 if (np->opt && np->opt->srcrt) {
669 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
670 ipv6_addr_copy(&final, &fl.fl6_dst);
671 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
675 err = ip6_dst_lookup(sk, &dst, &fl);
679 ipv6_addr_copy(&fl.fl6_dst, final_p);
681 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
688 ipv6_addr_copy(&np->rcv_saddr, saddr);
691 /* set the source address */
692 ipv6_addr_copy(&np->saddr, saddr);
693 inet->rcv_saddr = LOOPBACK4_IPV6;
695 ip6_dst_store(sk, dst, NULL);
696 sk->sk_route_caps = dst->dev->features &
697 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
699 tp->ext_header_len = 0;
701 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
702 tp->ext2_header_len = dst->header_len;
704 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
706 inet->dport = usin->sin6_port;
708 tcp_set_state(sk, TCP_SYN_SENT);
709 err = tcp_v6_hash_connect(sk);
714 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
719 err = tcp_connect(sk);
726 tcp_set_state(sk, TCP_CLOSE);
730 sk->sk_route_caps = 0;
734 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
735 int type, int code, int offset, __u32 info)
737 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
738 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
739 struct ipv6_pinfo *np;
745 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
748 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
752 if (sk->sk_state == TCP_TIME_WAIT) {
753 tcp_tw_put((struct tcp_tw_bucket*)sk);
758 if (sock_owned_by_user(sk))
759 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
761 if (sk->sk_state == TCP_CLOSE)
765 seq = ntohl(th->seq);
766 if (sk->sk_state != TCP_LISTEN &&
767 !between(seq, tp->snd_una, tp->snd_nxt)) {
768 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
774 if (type == ICMPV6_PKT_TOOBIG) {
775 struct dst_entry *dst = NULL;
777 if (sock_owned_by_user(sk))
779 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
782 /* icmp should have updated the destination cache entry */
783 dst = __sk_dst_check(sk, np->dst_cookie);
786 struct inet_opt *inet = inet_sk(sk);
789 /* BUGGG_FUTURE: Again, it is not clear how
790 to handle rthdr case. Ignore this complexity
793 memset(&fl, 0, sizeof(fl));
794 fl.proto = IPPROTO_TCP;
795 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
796 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
797 fl.oif = sk->sk_bound_dev_if;
798 fl.fl_ip_dport = inet->dport;
799 fl.fl_ip_sport = inet->sport;
801 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
802 sk->sk_err_soft = -err;
806 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
807 sk->sk_err_soft = -err;
814 if (tp->pmtu_cookie > dst_pmtu(dst)) {
815 tcp_sync_mss(sk, dst_pmtu(dst));
816 tcp_simple_retransmit(sk);
817 } /* else let the usual retransmit timer handle it */
822 icmpv6_err_convert(type, code, &err);
824 /* Might be for an open_request */
825 switch (sk->sk_state) {
826 struct open_request *req, **prev;
828 if (sock_owned_by_user(sk))
831 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
832 &hdr->saddr, tcp_v6_iif(skb));
836 /* ICMPs are not backlogged, hence we cannot get
837 * an established socket here.
839 BUG_TRAP(req->sk == NULL);
841 if (seq != req->snt_isn) {
842 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
846 tcp_synq_drop(sk, req, prev);
850 case TCP_SYN_RECV: /* Cannot happen.
851 It can, it SYNs are crossed. --ANK */
852 if (!sock_owned_by_user(sk)) {
853 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
855 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
859 sk->sk_err_soft = err;
863 if (!sock_owned_by_user(sk) && np->recverr) {
865 sk->sk_error_report(sk);
867 sk->sk_err_soft = err;
875 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
876 struct dst_entry *dst)
878 struct ipv6_pinfo *np = inet6_sk(sk);
879 struct sk_buff * skb;
880 struct ipv6_txoptions *opt = NULL;
881 struct in6_addr * final_p = NULL, final;
885 memset(&fl, 0, sizeof(fl));
886 fl.proto = IPPROTO_TCP;
887 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
888 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
889 fl.fl6_flowlabel = 0;
890 fl.oif = req->af.v6_req.iif;
891 fl.fl_ip_dport = req->rmt_port;
892 fl.fl_ip_sport = inet_sk(sk)->sport;
897 np->rxopt.bits.srcrt == 2 &&
898 req->af.v6_req.pktopts) {
899 struct sk_buff *pktopts = req->af.v6_req.pktopts;
900 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
902 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
905 if (opt && opt->srcrt) {
906 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
907 ipv6_addr_copy(&final, &fl.fl6_dst);
908 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
912 err = ip6_dst_lookup(sk, &dst, &fl);
916 ipv6_addr_copy(&fl.fl6_dst, final_p);
917 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
921 skb = tcp_make_synack(sk, dst, req);
923 struct tcphdr *th = skb->h.th;
925 th->check = tcp_v6_check(th, skb->len,
926 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
927 csum_partial((char *)th, skb->len, skb->csum));
929 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
930 err = ip6_xmit(sk, skb, &fl, opt, 0);
931 if (err == NET_XMIT_CN)
937 if (opt && opt != np->opt)
938 sock_kfree_s(sk, opt, opt->tot_len);
942 static void tcp_v6_or_free(struct open_request *req)
944 if (req->af.v6_req.pktopts)
945 kfree_skb(req->af.v6_req.pktopts);
948 static struct or_calltable or_ipv6 = {
950 .rtx_syn_ack = tcp_v6_send_synack,
951 .send_ack = tcp_v6_or_send_ack,
952 .destructor = tcp_v6_or_free,
953 .send_reset = tcp_v6_send_reset
956 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
958 struct ipv6_pinfo *np = inet6_sk(sk);
959 struct inet6_skb_parm *opt = IP6CB(skb);
962 if ((opt->hop && np->rxopt.bits.hopopts) ||
963 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
964 np->rxopt.bits.rxflow) ||
965 (opt->srcrt && np->rxopt.bits.srcrt) ||
966 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
973 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
976 struct ipv6_pinfo *np = inet6_sk(sk);
978 if (skb->ip_summed == CHECKSUM_HW) {
979 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
980 skb->csum = offsetof(struct tcphdr, check);
982 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
983 csum_partial((char *)th, th->doff<<2,
989 static void tcp_v6_send_reset(struct sk_buff *skb)
991 struct tcphdr *th = skb->h.th, *t1;
992 struct sk_buff *buff;
998 if (!ipv6_unicast_destination(skb))
1002 * We need to grab some memory, and put together an RST,
1003 * and then put it into the queue to be sent.
1006 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1011 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1013 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1015 /* Swap the send and the receive. */
1016 memset(t1, 0, sizeof(*t1));
1017 t1->dest = th->source;
1018 t1->source = th->dest;
1019 t1->doff = sizeof(*t1)/4;
1023 t1->seq = th->ack_seq;
1026 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1027 + skb->len - (th->doff<<2));
1030 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1032 memset(&fl, 0, sizeof(fl));
1033 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1034 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1036 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1037 sizeof(*t1), IPPROTO_TCP,
1040 fl.proto = IPPROTO_TCP;
1041 fl.oif = tcp_v6_iif(skb);
1042 fl.fl_ip_dport = t1->dest;
1043 fl.fl_ip_sport = t1->source;
1045 /* sk = NULL, but it is safe for now. RST socket required. */
1046 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1048 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1049 dst_release(buff->dst);
1053 ip6_xmit(NULL, buff, &fl, NULL, 0);
1054 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1055 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1062 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1064 struct tcphdr *th = skb->h.th, *t1;
1065 struct sk_buff *buff;
1067 int tot_len = sizeof(struct tcphdr);
1072 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1077 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1079 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1081 /* Swap the send and the receive. */
1082 memset(t1, 0, sizeof(*t1));
1083 t1->dest = th->source;
1084 t1->source = th->dest;
1085 t1->doff = tot_len/4;
1086 t1->seq = htonl(seq);
1087 t1->ack_seq = htonl(ack);
1089 t1->window = htons(win);
1092 u32 *ptr = (u32*)(t1 + 1);
1093 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1094 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1095 *ptr++ = htonl(tcp_time_stamp);
1099 buff->csum = csum_partial((char *)t1, tot_len, 0);
1101 memset(&fl, 0, sizeof(fl));
1102 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1103 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1105 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1106 tot_len, IPPROTO_TCP,
1109 fl.proto = IPPROTO_TCP;
1110 fl.oif = tcp_v6_iif(skb);
1111 fl.fl_ip_dport = t1->dest;
1112 fl.fl_ip_sport = t1->source;
1114 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1115 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1116 dst_release(buff->dst);
1119 ip6_xmit(NULL, buff, &fl, NULL, 0);
1120 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1127 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1129 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1131 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1132 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1137 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1139 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1143 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1145 struct open_request *req, **prev;
1146 struct tcphdr *th = skb->h.th;
1147 struct tcp_opt *tp = tcp_sk(sk);
1150 /* Find possible connection requests. */
1151 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1152 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1154 return tcp_check_req(sk, skb, req, prev);
1156 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1158 &skb->nh.ipv6h->daddr,
1163 if (nsk->sk_state != TCP_TIME_WAIT) {
1167 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1171 #if 0 /*def CONFIG_SYN_COOKIES*/
1172 if (!th->rst && !th->syn && th->ack)
1173 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1178 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1180 struct tcp_opt *tp = tcp_sk(sk);
1181 struct tcp_listen_opt *lopt = tp->listen_opt;
1182 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1185 req->expires = jiffies + TCP_TIMEOUT_INIT;
1187 req->dl_next = lopt->syn_table[h];
1189 write_lock(&tp->syn_wait_lock);
1190 lopt->syn_table[h] = req;
1191 write_unlock(&tp->syn_wait_lock);
1197 /* FIXME: this is substantially similar to the ipv4 code.
1198 * Can some kind of merge be done? -- erics
1200 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1202 struct ipv6_pinfo *np = inet6_sk(sk);
1203 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1204 struct open_request *req = NULL;
1205 __u32 isn = TCP_SKB_CB(skb)->when;
1207 if (skb->protocol == htons(ETH_P_IP))
1208 return tcp_v4_conn_request(sk, skb);
1210 if (!ipv6_unicast_destination(skb))
1214 * There are no SYN attacks on IPv6, yet...
1216 if (tcp_synq_is_full(sk) && !isn) {
1217 if (net_ratelimit())
1218 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1222 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1225 req = tcp_openreq_alloc();
1229 tcp_clear_options(&tmptp);
1230 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1231 tmptp.user_mss = tp->user_mss;
1233 tcp_parse_options(skb, &tmptp, 0);
1235 tmptp.tstamp_ok = tmptp.saw_tstamp;
1236 tcp_openreq_init(req, &tmptp, skb);
1238 req->class = &or_ipv6;
1239 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1240 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1241 TCP_ECN_create_request(req, skb->h.th);
1242 req->af.v6_req.pktopts = NULL;
1243 if (ipv6_opt_accepted(sk, skb) ||
1244 np->rxopt.bits.rxinfo ||
1245 np->rxopt.bits.rxhlim) {
1246 atomic_inc(&skb->users);
1247 req->af.v6_req.pktopts = skb;
1249 req->af.v6_req.iif = sk->sk_bound_dev_if;
1251 /* So that link locals have meaning */
1252 if (!sk->sk_bound_dev_if &&
1253 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1254 req->af.v6_req.iif = tcp_v6_iif(skb);
1257 isn = tcp_v6_init_sequence(sk,skb);
1261 if (tcp_v6_send_synack(sk, req, NULL))
1264 tcp_v6_synq_add(sk, req);
1270 tcp_openreq_free(req);
1272 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1273 return 0; /* don't send reset */
1276 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1277 struct open_request *req,
1278 struct dst_entry *dst)
1280 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1281 struct tcp6_sock *newtcp6sk;
1282 struct inet_opt *newinet;
1283 struct tcp_opt *newtp;
1285 struct ipv6_txoptions *opt;
1287 if (skb->protocol == htons(ETH_P_IP)) {
1292 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1297 newtcp6sk = (struct tcp6_sock *)newsk;
1298 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1300 newinet = inet_sk(newsk);
1301 newnp = inet6_sk(newsk);
1302 newtp = tcp_sk(newsk);
1304 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1306 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1309 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1312 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1314 newtp->af_specific = &ipv6_mapped;
1315 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1316 newnp->pktoptions = NULL;
1318 newnp->mcast_oif = tcp_v6_iif(skb);
1319 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1321 /* Charge newly allocated IPv6 socket. Though it is mapped,
1324 #ifdef INET_REFCNT_DEBUG
1325 atomic_inc(&inet6_sock_nr);
1328 /* It is tricky place. Until this moment IPv4 tcp
1329 worked with IPv6 af_tcp.af_specific.
1332 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1339 if (sk_acceptq_is_full(sk))
1342 if (np->rxopt.bits.srcrt == 2 &&
1343 opt == NULL && req->af.v6_req.pktopts) {
1344 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1346 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1350 struct in6_addr *final_p = NULL, final;
1353 memset(&fl, 0, sizeof(fl));
1354 fl.proto = IPPROTO_TCP;
1355 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1356 if (opt && opt->srcrt) {
1357 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1358 ipv6_addr_copy(&final, &fl.fl6_dst);
1359 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1362 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1363 fl.oif = sk->sk_bound_dev_if;
1364 fl.fl_ip_dport = req->rmt_port;
1365 fl.fl_ip_sport = inet_sk(sk)->sport;
1367 if (ip6_dst_lookup(sk, &dst, &fl))
1371 ipv6_addr_copy(&fl.fl6_dst, final_p);
1373 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1377 newsk = tcp_create_openreq_child(sk, req, skb);
1381 /* Charge newly allocated IPv6 socket */
1382 #ifdef INET_REFCNT_DEBUG
1383 atomic_inc(&inet6_sock_nr);
1386 ip6_dst_store(newsk, dst, NULL);
1387 newsk->sk_route_caps = dst->dev->features &
1388 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1390 newtcp6sk = (struct tcp6_sock *)newsk;
1391 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1393 newtp = tcp_sk(newsk);
1394 newinet = inet_sk(newsk);
1395 newnp = inet6_sk(newsk);
1397 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1399 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1400 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1401 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1402 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1404 /* Now IPv6 options...
1406 First: no IPv4 options.
1408 newinet->opt = NULL;
1411 newnp->rxopt.all = np->rxopt.all;
1413 /* Clone pktoptions received with SYN */
1414 newnp->pktoptions = NULL;
1415 if (req->af.v6_req.pktopts) {
1416 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1418 kfree_skb(req->af.v6_req.pktopts);
1419 req->af.v6_req.pktopts = NULL;
1420 if (newnp->pktoptions)
1421 skb_set_owner_r(newnp->pktoptions, newsk);
1424 newnp->mcast_oif = tcp_v6_iif(skb);
1425 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1427 /* Clone native IPv6 options from listening socket (if any)
1429 Yes, keeping reference count would be much more clever,
1430 but we make one more one thing there: reattach optmem
1434 newnp->opt = ipv6_dup_options(newsk, opt);
1436 sock_kfree_s(sk, opt, opt->tot_len);
1439 newtp->ext_header_len = 0;
1441 newtp->ext_header_len = newnp->opt->opt_nflen +
1442 newnp->opt->opt_flen;
1443 newtp->ext2_header_len = dst->header_len;
1445 tcp_sync_mss(newsk, dst_pmtu(dst));
1446 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1447 tcp_initialize_rcv_mss(newsk);
1449 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1451 __tcp_v6_hash(newsk);
1452 tcp_inherit_port(sk, newsk);
1457 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1459 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1460 if (opt && opt != np->opt)
1461 sock_kfree_s(sk, opt, opt->tot_len);
1466 static int tcp_v6_checksum_init(struct sk_buff *skb)
1468 if (skb->ip_summed == CHECKSUM_HW) {
1469 skb->ip_summed = CHECKSUM_UNNECESSARY;
1470 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1471 &skb->nh.ipv6h->daddr,skb->csum))
1473 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1475 if (skb->len <= 76) {
1476 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1477 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1479 skb->ip_summed = CHECKSUM_UNNECESSARY;
1481 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1482 &skb->nh.ipv6h->daddr,0);
1487 /* The socket must have it's spinlock held when we get
1490 * We have a potential double-lock case here, so even when
1491 * doing backlog processing we use the BH locking scheme.
1492 * This is because we cannot sleep with the original spinlock
1495 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1497 struct ipv6_pinfo *np = inet6_sk(sk);
1499 struct sk_buff *opt_skb = NULL;
1501 /* Imagine: socket is IPv6. IPv4 packet arrives,
1502 goes to IPv4 receive handler and backlogged.
1503 From backlog it always goes here. Kerboom...
1504 Fortunately, tcp_rcv_established and rcv_established
1505 handle them correctly, but it is not case with
1506 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1509 if (skb->protocol == htons(ETH_P_IP))
1510 return tcp_v4_do_rcv(sk, skb);
1512 if (sk_filter(sk, skb, 0))
1516 * socket locking is here for SMP purposes as backlog rcv
1517 * is currently called with bh processing disabled.
1520 /* Do Stevens' IPV6_PKTOPTIONS.
1522 Yes, guys, it is the only place in our code, where we
1523 may make it not affecting IPv4.
1524 The rest of code is protocol independent,
1525 and I do not like idea to uglify IPv4.
1527 Actually, all the idea behind IPV6_PKTOPTIONS
1528 looks not very well thought. For now we latch
1529 options, received in the last packet, enqueued
1530 by tcp. Feel free to propose better solution.
1534 opt_skb = skb_clone(skb, GFP_ATOMIC);
1536 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1537 TCP_CHECK_TIMER(sk);
1538 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1540 TCP_CHECK_TIMER(sk);
1542 goto ipv6_pktoptions;
1546 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1549 if (sk->sk_state == TCP_LISTEN) {
1550 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1555 * Queue it on the new socket if the new socket is active,
1556 * otherwise we just shortcircuit this and continue with
1560 if (tcp_child_process(sk, nsk, skb))
1563 __kfree_skb(opt_skb);
1568 TCP_CHECK_TIMER(sk);
1569 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1571 TCP_CHECK_TIMER(sk);
1573 goto ipv6_pktoptions;
1577 tcp_v6_send_reset(skb);
1580 __kfree_skb(opt_skb);
1584 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1589 /* Do you ask, what is it?
1591 1. skb was enqueued by tcp.
1592 2. skb is added to tail of read queue, rather than out of order.
1593 3. socket is not in passive state.
1594 4. Finally, it really contains options, which user wants to receive.
1597 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1598 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1599 if (np->rxopt.bits.rxinfo)
1600 np->mcast_oif = tcp_v6_iif(opt_skb);
1601 if (np->rxopt.bits.rxhlim)
1602 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1603 if (ipv6_opt_accepted(sk, opt_skb)) {
1604 skb_set_owner_r(opt_skb, sk);
1605 opt_skb = xchg(&np->pktoptions, opt_skb);
1607 __kfree_skb(opt_skb);
1608 opt_skb = xchg(&np->pktoptions, NULL);
1617 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1619 struct sk_buff *skb = *pskb;
1624 if (skb->pkt_type != PACKET_HOST)
1628 * Count it even if it's bad.
1630 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1632 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1637 if (th->doff < sizeof(struct tcphdr)/4)
1639 if (!pskb_may_pull(skb, th->doff*4))
1642 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1643 tcp_v6_checksum_init(skb) < 0))
1647 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1648 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1649 skb->len - th->doff*4);
1650 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1651 TCP_SKB_CB(skb)->when = 0;
1652 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1653 TCP_SKB_CB(skb)->sacked = 0;
1655 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1656 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1662 if (sk->sk_state == TCP_TIME_WAIT)
1665 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1666 goto discard_and_relse;
1668 if (sk_filter(sk, skb, 0))
1669 goto discard_and_relse;
1675 if (!sock_owned_by_user(sk)) {
1676 if (!tcp_prequeue(sk, skb))
1677 ret = tcp_v6_do_rcv(sk, skb);
1679 sk_add_backlog(sk, skb);
1683 return ret ? -1 : 0;
1686 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1689 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1691 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1693 tcp_v6_send_reset(skb);
1710 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1711 tcp_tw_put((struct tcp_tw_bucket *) sk);
1715 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1716 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1717 tcp_tw_put((struct tcp_tw_bucket *) sk);
1721 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1722 skb, th, skb->len)) {
1727 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1729 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1730 tcp_tw_put((struct tcp_tw_bucket *)sk);
1734 /* Fall through to ACK */
1737 tcp_v6_timewait_ack(sk, skb);
1741 case TCP_TW_SUCCESS:;
1746 static int tcp_v6_rebuild_header(struct sock *sk)
1749 struct dst_entry *dst;
1750 struct ipv6_pinfo *np = inet6_sk(sk);
1752 dst = __sk_dst_check(sk, np->dst_cookie);
1755 struct inet_opt *inet = inet_sk(sk);
1756 struct in6_addr *final_p = NULL, final;
1759 memset(&fl, 0, sizeof(fl));
1760 fl.proto = IPPROTO_TCP;
1761 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1762 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1763 fl.fl6_flowlabel = np->flow_label;
1764 fl.oif = sk->sk_bound_dev_if;
1765 fl.fl_ip_dport = inet->dport;
1766 fl.fl_ip_sport = inet->sport;
1768 if (np->opt && np->opt->srcrt) {
1769 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1770 ipv6_addr_copy(&final, &fl.fl6_dst);
1771 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1775 err = ip6_dst_lookup(sk, &dst, &fl);
1777 sk->sk_route_caps = 0;
1781 ipv6_addr_copy(&fl.fl6_dst, final_p);
1783 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1784 sk->sk_err_soft = -err;
1789 ip6_dst_store(sk, dst, NULL);
1790 sk->sk_route_caps = dst->dev->features &
1791 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1792 tcp_sk(sk)->ext2_header_len = dst->header_len;
1798 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1800 struct sock *sk = skb->sk;
1801 struct inet_opt *inet = inet_sk(sk);
1802 struct ipv6_pinfo *np = inet6_sk(sk);
1804 struct dst_entry *dst;
1806 memset(&fl, 0, sizeof(fl));
1807 fl.proto = IPPROTO_TCP;
1808 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1809 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1810 fl.fl6_flowlabel = np->flow_label;
1811 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1812 fl.oif = sk->sk_bound_dev_if;
1813 fl.fl_ip_sport = inet->sport;
1814 fl.fl_ip_dport = inet->dport;
1816 if (np->opt && np->opt->srcrt) {
1817 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1818 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1821 dst = __sk_dst_check(sk, np->dst_cookie);
1824 int err = ip6_dst_lookup(sk, &dst, &fl);
1827 sk->sk_err_soft = -err;
1831 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1832 sk->sk_route_caps = 0;
1837 ip6_dst_store(sk, dst, NULL);
1838 sk->sk_route_caps = dst->dev->features &
1839 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1840 tcp_sk(sk)->ext2_header_len = dst->header_len;
1843 skb->dst = dst_clone(dst);
1845 /* Restore final destination back after routing done */
1846 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1848 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1851 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1853 struct ipv6_pinfo *np = inet6_sk(sk);
1854 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1856 sin6->sin6_family = AF_INET6;
1857 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1858 sin6->sin6_port = inet_sk(sk)->dport;
1859 /* We do not store received flowlabel for TCP */
1860 sin6->sin6_flowinfo = 0;
1861 sin6->sin6_scope_id = 0;
1862 if (sk->sk_bound_dev_if &&
1863 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1864 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1867 static int tcp_v6_remember_stamp(struct sock *sk)
1869 /* Alas, not yet... */
1873 static struct tcp_func ipv6_specific = {
1874 .queue_xmit = tcp_v6_xmit,
1875 .send_check = tcp_v6_send_check,
1876 .rebuild_header = tcp_v6_rebuild_header,
1877 .conn_request = tcp_v6_conn_request,
1878 .syn_recv_sock = tcp_v6_syn_recv_sock,
1879 .remember_stamp = tcp_v6_remember_stamp,
1880 .net_header_len = sizeof(struct ipv6hdr),
1882 .setsockopt = ipv6_setsockopt,
1883 .getsockopt = ipv6_getsockopt,
1884 .addr2sockaddr = v6_addr2sockaddr,
1885 .sockaddr_len = sizeof(struct sockaddr_in6)
1889 * TCP over IPv4 via INET6 API
1892 static struct tcp_func ipv6_mapped = {
1893 .queue_xmit = ip_queue_xmit,
1894 .send_check = tcp_v4_send_check,
1895 .rebuild_header = tcp_v4_rebuild_header,
1896 .conn_request = tcp_v6_conn_request,
1897 .syn_recv_sock = tcp_v6_syn_recv_sock,
1898 .remember_stamp = tcp_v4_remember_stamp,
1899 .net_header_len = sizeof(struct iphdr),
1901 .setsockopt = ipv6_setsockopt,
1902 .getsockopt = ipv6_getsockopt,
1903 .addr2sockaddr = v6_addr2sockaddr,
1904 .sockaddr_len = sizeof(struct sockaddr_in6)
1909 /* NOTE: A lot of things set to zero explicitly by call to
1910 * sk_alloc() so need not be done here.
1912 static int tcp_v6_init_sock(struct sock *sk)
1914 struct tcp_opt *tp = tcp_sk(sk);
1916 skb_queue_head_init(&tp->out_of_order_queue);
1917 tcp_init_xmit_timers(sk);
1918 tcp_prequeue_init(tp);
1920 tp->rto = TCP_TIMEOUT_INIT;
1921 tp->mdev = TCP_TIMEOUT_INIT;
1923 /* So many TCP implementations out there (incorrectly) count the
1924 * initial SYN frame in their delayed-ACK and congestion control
1925 * algorithms that we must have the following bandaid to talk
1926 * efficiently to them. -DaveM
1930 /* See draft-stevens-tcpca-spec-01 for discussion of the
1931 * initialization of these values.
1933 tp->snd_ssthresh = 0x7fffffff;
1934 tp->snd_cwnd_clamp = ~0;
1935 tp->mss_cache_std = tp->mss_cache = 536;
1937 tp->reordering = sysctl_tcp_reordering;
1939 sk->sk_state = TCP_CLOSE;
1941 tp->af_specific = &ipv6_specific;
1943 sk->sk_write_space = sk_stream_write_space;
1944 sk->sk_use_write_queue = 1;
1946 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1947 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1949 atomic_inc(&tcp_sockets_allocated);
1954 static int tcp_v6_destroy_sock(struct sock *sk)
1956 extern int tcp_v4_destroy_sock(struct sock *sk);
1958 tcp_v4_destroy_sock(sk);
1959 return inet6_destroy_sock(sk);
1962 /* Proc filesystem TCPv6 sock list dumping. */
1963 static void get_openreq6(struct seq_file *seq,
1964 struct sock *sk, struct open_request *req, int i, int uid)
1966 struct in6_addr *dest, *src;
1967 int ttd = req->expires - jiffies;
1972 src = &req->af.v6_req.loc_addr;
1973 dest = &req->af.v6_req.rmt_addr;
1975 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1976 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1978 src->s6_addr32[0], src->s6_addr32[1],
1979 src->s6_addr32[2], src->s6_addr32[3],
1980 ntohs(inet_sk(sk)->sport),
1981 dest->s6_addr32[0], dest->s6_addr32[1],
1982 dest->s6_addr32[2], dest->s6_addr32[3],
1983 ntohs(req->rmt_port),
1985 0,0, /* could print option size, but that is af dependent. */
1986 1, /* timers active (only the expire timer) */
1987 jiffies_to_clock_t(ttd),
1990 0, /* non standard timer */
1991 0, /* open_requests have no inode */
1995 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1997 struct in6_addr *dest, *src;
2000 unsigned long timer_expires;
2001 struct inet_opt *inet = inet_sk(sp);
2002 struct tcp_opt *tp = tcp_sk(sp);
2003 struct ipv6_pinfo *np = inet6_sk(sp);
2006 src = &np->rcv_saddr;
2007 destp = ntohs(inet->dport);
2008 srcp = ntohs(inet->sport);
2009 if (tp->pending == TCP_TIME_RETRANS) {
2011 timer_expires = tp->timeout;
2012 } else if (tp->pending == TCP_TIME_PROBE0) {
2014 timer_expires = tp->timeout;
2015 } else if (timer_pending(&sp->sk_timer)) {
2017 timer_expires = sp->sk_timer.expires;
2020 timer_expires = jiffies;
2024 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2025 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2027 src->s6_addr32[0], src->s6_addr32[1],
2028 src->s6_addr32[2], src->s6_addr32[3], srcp,
2029 dest->s6_addr32[0], dest->s6_addr32[1],
2030 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2032 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2034 jiffies_to_clock_t(timer_expires - jiffies),
2039 atomic_read(&sp->sk_refcnt), sp,
2040 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2041 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2045 static void get_timewait6_sock(struct seq_file *seq,
2046 struct tcp_tw_bucket *tw, int i)
2048 struct in6_addr *dest, *src;
2050 int ttd = tw->tw_ttd - jiffies;
2055 dest = &tw->tw_v6_daddr;
2056 src = &tw->tw_v6_rcv_saddr;
2057 destp = ntohs(tw->tw_dport);
2058 srcp = ntohs(tw->tw_sport);
2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2064 src->s6_addr32[0], src->s6_addr32[1],
2065 src->s6_addr32[2], src->s6_addr32[3], srcp,
2066 dest->s6_addr32[0], dest->s6_addr32[1],
2067 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2068 tw->tw_substate, 0, 0,
2069 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2070 atomic_read(&tw->tw_refcnt), tw);
2073 #ifdef CONFIG_PROC_FS
2074 static int tcp6_seq_show(struct seq_file *seq, void *v)
2076 struct tcp_iter_state *st;
2078 if (v == SEQ_START_TOKEN) {
2083 "st tx_queue rx_queue tr tm->when retrnsmt"
2084 " uid timeout inode\n");
2089 switch (st->state) {
2090 case TCP_SEQ_STATE_LISTENING:
2091 case TCP_SEQ_STATE_ESTABLISHED:
2092 get_tcp6_sock(seq, v, st->num);
2094 case TCP_SEQ_STATE_OPENREQ:
2095 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2097 case TCP_SEQ_STATE_TIME_WAIT:
2098 get_timewait6_sock(seq, v, st->num);
2105 static struct file_operations tcp6_seq_fops;
2106 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2107 .owner = THIS_MODULE,
2110 .seq_show = tcp6_seq_show,
2111 .seq_fops = &tcp6_seq_fops,
2114 int __init tcp6_proc_init(void)
2116 return tcp_proc_register(&tcp6_seq_afinfo);
2119 void tcp6_proc_exit(void)
2121 tcp_proc_unregister(&tcp6_seq_afinfo);
2125 struct proto tcpv6_prot = {
2128 .connect = tcp_v6_connect,
2129 .disconnect = tcp_disconnect,
2130 .accept = tcp_accept,
2132 .init = tcp_v6_init_sock,
2133 .destroy = tcp_v6_destroy_sock,
2134 .shutdown = tcp_shutdown,
2135 .setsockopt = tcp_setsockopt,
2136 .getsockopt = tcp_getsockopt,
2137 .sendmsg = tcp_sendmsg,
2138 .recvmsg = tcp_recvmsg,
2139 .backlog_rcv = tcp_v6_do_rcv,
2140 .hash = tcp_v6_hash,
2141 .unhash = tcp_unhash,
2142 .get_port = tcp_v6_get_port,
2143 .enter_memory_pressure = tcp_enter_memory_pressure,
2144 .sockets_allocated = &tcp_sockets_allocated,
2145 .memory_allocated = &tcp_memory_allocated,
2146 .memory_pressure = &tcp_memory_pressure,
2147 .sysctl_mem = sysctl_tcp_mem,
2148 .sysctl_wmem = sysctl_tcp_wmem,
2149 .sysctl_rmem = sysctl_tcp_rmem,
2150 .max_header = MAX_TCP_HEADER,
2151 .slab_obj_size = sizeof(struct tcp6_sock),
2154 static struct inet6_protocol tcpv6_protocol = {
2155 .handler = tcp_v6_rcv,
2156 .err_handler = tcp_v6_err,
2157 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2160 extern struct proto_ops inet6_stream_ops;
2162 static struct inet_protosw tcpv6_protosw = {
2163 .type = SOCK_STREAM,
2164 .protocol = IPPROTO_TCP,
2165 .prot = &tcpv6_prot,
2166 .ops = &inet6_stream_ops,
2169 .flags = INET_PROTOSW_PERMANENT,
2172 void __init tcpv6_init(void)
2174 /* register inet6 protocol */
2175 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2176 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2177 inet6_register_protosw(&tcpv6_protosw);