3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_opt *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
144 if ((rover < low) || (rover > high))
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
158 /* Exhausted local port range during search? */
163 /* OK, here is the one we will use. */
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
181 if (tcp_v6_bind_conflict(sk, tb))
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205 spin_unlock(&head->lock);
211 static __inline__ void __tcp_v6_hash(struct sock *sk)
213 struct hlist_head *list;
216 BUG_TRAP(sk_unhashed(sk));
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
235 static void tcp_v6_hash(struct sock *sk)
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_opt *tp = tcp_sk(sk);
240 if (tp->af_specific == &ipv6_mapped) {
250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
253 struct hlist_node *node;
254 struct sock *result = NULL;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
278 if (score > hiscore) {
286 read_unlock(&tcp_lhash_lock);
290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
293 * The sockhash lock must be held as a reader here.
296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
300 struct tcp_ehash_bucket *head;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
325 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330 read_unlock(&head->lock);
335 read_unlock(&head->lock);
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
367 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
371 * Open request hash tables.
374 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
378 a = raddr->s6_addr32[0];
379 b = raddr->s6_addr32[1];
380 c = raddr->s6_addr32[2];
382 a += JHASH_GOLDEN_RATIO;
383 b += JHASH_GOLDEN_RATIO;
385 __jhash_mix(a, b, c);
387 a += raddr->s6_addr32[3];
389 __jhash_mix(a, b, c);
391 return c & (TCP_SYNQ_HSIZE - 1);
394 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
395 struct open_request ***prevp,
397 struct in6_addr *raddr,
398 struct in6_addr *laddr,
401 struct tcp_listen_opt *lopt = tp->listen_opt;
402 struct open_request *req, **prev;
404 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
405 (req = *prev) != NULL;
406 prev = &req->dl_next) {
407 if (req->rmt_port == rport &&
408 req->class->family == AF_INET6 &&
409 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
410 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
411 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
412 BUG_TRAP(req->sk == NULL);
421 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
422 struct in6_addr *saddr,
423 struct in6_addr *daddr,
426 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
429 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
431 if (skb->protocol == htons(ETH_P_IPV6)) {
432 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
433 skb->nh.ipv6h->saddr.s6_addr32,
437 return secure_tcp_sequence_number(skb->nh.iph->daddr,
444 static int tcp_v6_check_established(struct sock *sk)
446 struct inet_opt *inet = inet_sk(sk);
447 struct ipv6_pinfo *np = inet6_sk(sk);
448 struct in6_addr *daddr = &np->rcv_saddr;
449 struct in6_addr *saddr = &np->daddr;
450 int dif = sk->sk_bound_dev_if;
451 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
452 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
453 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
455 struct hlist_node *node;
456 struct tcp_tw_bucket *tw;
458 write_lock_bh(&head->lock);
460 /* Check TIME-WAIT sockets first. */
461 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
462 tw = (struct tcp_tw_bucket*)sk2;
464 if(*((__u32 *)&(tw->tw_dport)) == ports &&
465 sk2->sk_family == PF_INET6 &&
466 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
467 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
468 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
469 struct tcp_opt *tp = tcp_sk(sk);
471 if (tw->tw_ts_recent_stamp) {
472 /* See comment in tcp_ipv4.c */
473 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
476 tp->ts_recent = tw->tw_ts_recent;
477 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
486 /* And established part... */
487 sk_for_each(sk2, node, &head->chain) {
488 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
493 BUG_TRAP(sk_unhashed(sk));
494 __sk_add_node(sk, &head->chain);
495 sk->sk_hashent = hash;
496 sock_prot_inc_use(sk->sk_prot);
497 write_unlock_bh(&head->lock);
500 /* Silly. Should hash-dance instead... */
502 tcp_tw_deschedule(tw);
503 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
511 write_unlock_bh(&head->lock);
512 return -EADDRNOTAVAIL;
515 static int tcp_v6_hash_connect(struct sock *sk)
517 struct tcp_bind_hashbucket *head;
518 struct tcp_bind_bucket *tb;
521 if (inet_sk(sk)->num == 0) {
522 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
525 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
528 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
531 spin_lock_bh(&head->lock);
533 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
535 spin_unlock_bh(&head->lock);
538 spin_unlock_bh(&head->lock);
539 return tcp_v6_check_established(sk);
543 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
545 return IP6CB(skb)->iif;
548 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
551 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
552 struct inet_opt *inet = inet_sk(sk);
553 struct ipv6_pinfo *np = inet6_sk(sk);
554 struct tcp_opt *tp = tcp_sk(sk);
555 struct in6_addr *saddr = NULL, *final_p = NULL, final;
557 struct dst_entry *dst;
561 if (addr_len < SIN6_LEN_RFC2133)
564 if (usin->sin6_family != AF_INET6)
565 return(-EAFNOSUPPORT);
567 memset(&fl, 0, sizeof(fl));
570 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
571 IP6_ECN_flow_init(fl.fl6_flowlabel);
572 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
573 struct ip6_flowlabel *flowlabel;
574 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
575 if (flowlabel == NULL)
577 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
578 fl6_sock_release(flowlabel);
583 * connect() to INADDR_ANY means loopback (BSD'ism).
586 if(ipv6_addr_any(&usin->sin6_addr))
587 usin->sin6_addr.s6_addr[15] = 0x1;
589 addr_type = ipv6_addr_type(&usin->sin6_addr);
591 if(addr_type & IPV6_ADDR_MULTICAST)
594 if (addr_type&IPV6_ADDR_LINKLOCAL) {
595 if (addr_len >= sizeof(struct sockaddr_in6) &&
596 usin->sin6_scope_id) {
597 /* If interface is set while binding, indices
600 if (sk->sk_bound_dev_if &&
601 sk->sk_bound_dev_if != usin->sin6_scope_id)
604 sk->sk_bound_dev_if = usin->sin6_scope_id;
607 /* Connect to link-local address requires an interface */
608 if (!sk->sk_bound_dev_if)
612 if (tp->ts_recent_stamp &&
613 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
615 tp->ts_recent_stamp = 0;
619 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
620 np->flow_label = fl.fl6_flowlabel;
626 if (addr_type == IPV6_ADDR_MAPPED) {
627 u32 exthdrlen = tp->ext_header_len;
628 struct sockaddr_in sin;
630 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
632 if (__ipv6_only_sock(sk))
635 sin.sin_family = AF_INET;
636 sin.sin_port = usin->sin6_port;
637 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
639 tp->af_specific = &ipv6_mapped;
640 sk->sk_backlog_rcv = tcp_v4_do_rcv;
642 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
645 tp->ext_header_len = exthdrlen;
646 tp->af_specific = &ipv6_specific;
647 sk->sk_backlog_rcv = tcp_v6_do_rcv;
650 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
652 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
659 if (!ipv6_addr_any(&np->rcv_saddr))
660 saddr = &np->rcv_saddr;
662 fl.proto = IPPROTO_TCP;
663 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
664 ipv6_addr_copy(&fl.fl6_src,
665 (saddr ? saddr : &np->saddr));
666 fl.oif = sk->sk_bound_dev_if;
667 fl.fl_ip_dport = usin->sin6_port;
668 fl.fl_ip_sport = inet->sport;
670 if (np->opt && np->opt->srcrt) {
671 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
672 ipv6_addr_copy(&final, &fl.fl6_dst);
673 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
677 err = ip6_dst_lookup(sk, &dst, &fl);
681 ipv6_addr_copy(&fl.fl6_dst, final_p);
683 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
690 ipv6_addr_copy(&np->rcv_saddr, saddr);
693 /* set the source address */
694 ipv6_addr_copy(&np->saddr, saddr);
695 inet->rcv_saddr = LOOPBACK4_IPV6;
697 ip6_dst_store(sk, dst, NULL);
698 sk->sk_route_caps = dst->dev->features &
699 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
701 tp->ext_header_len = 0;
703 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
704 tp->ext2_header_len = dst->header_len;
706 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
708 inet->dport = usin->sin6_port;
710 tcp_set_state(sk, TCP_SYN_SENT);
711 err = tcp_v6_hash_connect(sk);
716 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
721 err = tcp_connect(sk);
728 tcp_set_state(sk, TCP_CLOSE);
732 sk->sk_route_caps = 0;
736 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
737 int type, int code, int offset, __u32 info)
739 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
740 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
741 struct ipv6_pinfo *np;
747 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
750 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
754 if (sk->sk_state == TCP_TIME_WAIT) {
755 tcp_tw_put((struct tcp_tw_bucket*)sk);
760 if (sock_owned_by_user(sk))
761 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
763 if (sk->sk_state == TCP_CLOSE)
767 seq = ntohl(th->seq);
768 if (sk->sk_state != TCP_LISTEN &&
769 !between(seq, tp->snd_una, tp->snd_nxt)) {
770 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
776 if (type == ICMPV6_PKT_TOOBIG) {
777 struct dst_entry *dst = NULL;
779 if (sock_owned_by_user(sk))
781 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
784 /* icmp should have updated the destination cache entry */
785 dst = __sk_dst_check(sk, np->dst_cookie);
788 struct inet_opt *inet = inet_sk(sk);
791 /* BUGGG_FUTURE: Again, it is not clear how
792 to handle rthdr case. Ignore this complexity
795 memset(&fl, 0, sizeof(fl));
796 fl.proto = IPPROTO_TCP;
797 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
798 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
799 fl.oif = sk->sk_bound_dev_if;
800 fl.fl_ip_dport = inet->dport;
801 fl.fl_ip_sport = inet->sport;
803 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
804 sk->sk_err_soft = -err;
808 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
809 sk->sk_err_soft = -err;
816 if (tp->pmtu_cookie > dst_pmtu(dst)) {
817 tcp_sync_mss(sk, dst_pmtu(dst));
818 tcp_simple_retransmit(sk);
819 } /* else let the usual retransmit timer handle it */
824 icmpv6_err_convert(type, code, &err);
826 /* Might be for an open_request */
827 switch (sk->sk_state) {
828 struct open_request *req, **prev;
830 if (sock_owned_by_user(sk))
833 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
834 &hdr->saddr, tcp_v6_iif(skb));
838 /* ICMPs are not backlogged, hence we cannot get
839 * an established socket here.
841 BUG_TRAP(req->sk == NULL);
843 if (seq != req->snt_isn) {
844 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
848 tcp_synq_drop(sk, req, prev);
852 case TCP_SYN_RECV: /* Cannot happen.
853 It can, it SYNs are crossed. --ANK */
854 if (!sock_owned_by_user(sk)) {
855 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
857 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
861 sk->sk_err_soft = err;
865 if (!sock_owned_by_user(sk) && np->recverr) {
867 sk->sk_error_report(sk);
869 sk->sk_err_soft = err;
877 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
878 struct dst_entry *dst)
880 struct ipv6_pinfo *np = inet6_sk(sk);
881 struct sk_buff * skb;
882 struct ipv6_txoptions *opt = NULL;
883 struct in6_addr * final_p = NULL, final;
887 memset(&fl, 0, sizeof(fl));
888 fl.proto = IPPROTO_TCP;
889 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
890 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
891 fl.fl6_flowlabel = 0;
892 fl.oif = req->af.v6_req.iif;
893 fl.fl_ip_dport = req->rmt_port;
894 fl.fl_ip_sport = inet_sk(sk)->sport;
899 np->rxopt.bits.srcrt == 2 &&
900 req->af.v6_req.pktopts) {
901 struct sk_buff *pktopts = req->af.v6_req.pktopts;
902 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
904 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
907 if (opt && opt->srcrt) {
908 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
909 ipv6_addr_copy(&final, &fl.fl6_dst);
910 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
914 err = ip6_dst_lookup(sk, &dst, &fl);
918 ipv6_addr_copy(&fl.fl6_dst, final_p);
919 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
923 skb = tcp_make_synack(sk, dst, req);
925 struct tcphdr *th = skb->h.th;
927 th->check = tcp_v6_check(th, skb->len,
928 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
929 csum_partial((char *)th, skb->len, skb->csum));
931 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
932 err = ip6_xmit(sk, skb, &fl, opt, 0);
933 if (err == NET_XMIT_CN)
939 if (opt && opt != np->opt)
940 sock_kfree_s(sk, opt, opt->tot_len);
944 static void tcp_v6_or_free(struct open_request *req)
946 if (req->af.v6_req.pktopts)
947 kfree_skb(req->af.v6_req.pktopts);
950 static struct or_calltable or_ipv6 = {
952 .rtx_syn_ack = tcp_v6_send_synack,
953 .send_ack = tcp_v6_or_send_ack,
954 .destructor = tcp_v6_or_free,
955 .send_reset = tcp_v6_send_reset
958 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct inet6_skb_parm *opt = IP6CB(skb);
964 if ((opt->hop && np->rxopt.bits.hopopts) ||
965 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
966 np->rxopt.bits.rxflow) ||
967 (opt->srcrt && np->rxopt.bits.srcrt) ||
968 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
975 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
978 struct ipv6_pinfo *np = inet6_sk(sk);
980 if (skb->ip_summed == CHECKSUM_HW) {
981 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
982 skb->csum = offsetof(struct tcphdr, check);
984 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
985 csum_partial((char *)th, th->doff<<2,
991 static void tcp_v6_send_reset(struct sk_buff *skb)
993 struct tcphdr *th = skb->h.th, *t1;
994 struct sk_buff *buff;
1000 if (!ipv6_unicast_destination(skb))
1004 * We need to grab some memory, and put together an RST,
1005 * and then put it into the queue to be sent.
1008 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1013 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1015 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1017 /* Swap the send and the receive. */
1018 memset(t1, 0, sizeof(*t1));
1019 t1->dest = th->source;
1020 t1->source = th->dest;
1021 t1->doff = sizeof(*t1)/4;
1025 t1->seq = th->ack_seq;
1028 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1029 + skb->len - (th->doff<<2));
1032 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1034 memset(&fl, 0, sizeof(fl));
1035 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1036 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1038 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1039 sizeof(*t1), IPPROTO_TCP,
1042 fl.proto = IPPROTO_TCP;
1043 fl.oif = tcp_v6_iif(skb);
1044 fl.fl_ip_dport = t1->dest;
1045 fl.fl_ip_sport = t1->source;
1047 /* sk = NULL, but it is safe for now. RST socket required. */
1048 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1050 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1051 dst_release(buff->dst);
1055 ip6_xmit(NULL, buff, &fl, NULL, 0);
1056 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1057 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1064 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1066 struct tcphdr *th = skb->h.th, *t1;
1067 struct sk_buff *buff;
1069 int tot_len = sizeof(struct tcphdr);
1074 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1079 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1081 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1083 /* Swap the send and the receive. */
1084 memset(t1, 0, sizeof(*t1));
1085 t1->dest = th->source;
1086 t1->source = th->dest;
1087 t1->doff = tot_len/4;
1088 t1->seq = htonl(seq);
1089 t1->ack_seq = htonl(ack);
1091 t1->window = htons(win);
1094 u32 *ptr = (u32*)(t1 + 1);
1095 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1096 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1097 *ptr++ = htonl(tcp_time_stamp);
1101 buff->csum = csum_partial((char *)t1, tot_len, 0);
1103 memset(&fl, 0, sizeof(fl));
1104 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1105 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1107 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1108 tot_len, IPPROTO_TCP,
1111 fl.proto = IPPROTO_TCP;
1112 fl.oif = tcp_v6_iif(skb);
1113 fl.fl_ip_dport = t1->dest;
1114 fl.fl_ip_sport = t1->source;
1116 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1117 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1118 dst_release(buff->dst);
1121 ip6_xmit(NULL, buff, &fl, NULL, 0);
1122 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1129 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1131 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1133 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1134 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1139 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1141 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1145 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1147 struct open_request *req, **prev;
1148 struct tcphdr *th = skb->h.th;
1149 struct tcp_opt *tp = tcp_sk(sk);
1152 /* Find possible connection requests. */
1153 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1154 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1156 return tcp_check_req(sk, skb, req, prev);
1158 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1160 &skb->nh.ipv6h->daddr,
1165 if (nsk->sk_state != TCP_TIME_WAIT) {
1169 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1173 #if 0 /*def CONFIG_SYN_COOKIES*/
1174 if (!th->rst && !th->syn && th->ack)
1175 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1180 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1182 struct tcp_opt *tp = tcp_sk(sk);
1183 struct tcp_listen_opt *lopt = tp->listen_opt;
1184 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1187 req->expires = jiffies + TCP_TIMEOUT_INIT;
1189 req->dl_next = lopt->syn_table[h];
1191 write_lock(&tp->syn_wait_lock);
1192 lopt->syn_table[h] = req;
1193 write_unlock(&tp->syn_wait_lock);
1195 #ifdef CONFIG_ACCEPT_QUEUES
1196 tcp_synq_added(sk, req);
1203 /* FIXME: this is substantially similar to the ipv4 code.
1204 * Can some kind of merge be done? -- erics
1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1208 struct ipv6_pinfo *np = inet6_sk(sk);
1209 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1210 struct open_request *req = NULL;
1211 __u32 isn = TCP_SKB_CB(skb)->when;
1212 #ifdef CONFIG_ACCEPT_QUEUES
1216 if (skb->protocol == htons(ETH_P_IP))
1217 return tcp_v4_conn_request(sk, skb);
1219 if (!ipv6_unicast_destination(skb))
1224 * There are no SYN attacks on IPv6, yet...
1226 if (tcp_synq_is_full(sk) && !isn) {
1227 if (net_ratelimit())
1228 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1232 #ifdef CONFIG_ACCEPT_QUEUES
1233 class = (skb->nfmark <= 0) ? 0 :
1234 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1236 * Accept only if the class has shares set or if the default class
1237 * i.e. class 0 has shares
1239 if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1240 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1246 if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1248 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1253 req = tcp_openreq_alloc();
1257 tcp_clear_options(&tmptp);
1258 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1259 tmptp.user_mss = tp->user_mss;
1261 tcp_parse_options(skb, &tmptp, 0);
1263 tmptp.tstamp_ok = tmptp.saw_tstamp;
1264 tcp_openreq_init(req, &tmptp, skb);
1265 #ifdef CONFIG_ACCEPT_QUEUES
1266 req->acceptq_class = class;
1267 req->acceptq_time_stamp = jiffies;
1269 req->class = &or_ipv6;
1270 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1271 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1272 TCP_ECN_create_request(req, skb->h.th);
1273 req->af.v6_req.pktopts = NULL;
1274 if (ipv6_opt_accepted(sk, skb) ||
1275 np->rxopt.bits.rxinfo ||
1276 np->rxopt.bits.rxhlim) {
1277 atomic_inc(&skb->users);
1278 req->af.v6_req.pktopts = skb;
1280 req->af.v6_req.iif = sk->sk_bound_dev_if;
1282 /* So that link locals have meaning */
1283 if (!sk->sk_bound_dev_if &&
1284 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1285 req->af.v6_req.iif = tcp_v6_iif(skb);
1288 isn = tcp_v6_init_sequence(sk,skb);
1292 if (tcp_v6_send_synack(sk, req, NULL))
1295 tcp_v6_synq_add(sk, req);
1301 tcp_openreq_free(req);
1303 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1304 return 0; /* don't send reset */
1307 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1308 struct open_request *req,
1309 struct dst_entry *dst)
1311 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1312 struct tcp6_sock *newtcp6sk;
1313 struct inet_opt *newinet;
1314 struct tcp_opt *newtp;
1316 struct ipv6_txoptions *opt;
1318 if (skb->protocol == htons(ETH_P_IP)) {
1323 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1328 newtcp6sk = (struct tcp6_sock *)newsk;
1329 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1331 newinet = inet_sk(newsk);
1332 newnp = inet6_sk(newsk);
1333 newtp = tcp_sk(newsk);
1335 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1337 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1340 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1343 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1345 newtp->af_specific = &ipv6_mapped;
1346 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1347 newnp->pktoptions = NULL;
1349 newnp->mcast_oif = tcp_v6_iif(skb);
1350 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1352 /* Charge newly allocated IPv6 socket. Though it is mapped,
1355 #ifdef INET_REFCNT_DEBUG
1356 atomic_inc(&inet6_sock_nr);
1359 /* It is tricky place. Until this moment IPv4 tcp
1360 worked with IPv6 af_tcp.af_specific.
1363 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1370 #ifdef CONFIG_ACCEPT_QUEUES
1371 if (sk_acceptq_is_full(sk, req->acceptq_class))
1373 if (sk_acceptq_is_full(sk))
1377 if (np->rxopt.bits.srcrt == 2 &&
1378 opt == NULL && req->af.v6_req.pktopts) {
1379 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1381 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1385 struct in6_addr *final_p = NULL, final;
1388 memset(&fl, 0, sizeof(fl));
1389 fl.proto = IPPROTO_TCP;
1390 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1391 if (opt && opt->srcrt) {
1392 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1393 ipv6_addr_copy(&final, &fl.fl6_dst);
1394 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1397 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1398 fl.oif = sk->sk_bound_dev_if;
1399 fl.fl_ip_dport = req->rmt_port;
1400 fl.fl_ip_sport = inet_sk(sk)->sport;
1402 if (ip6_dst_lookup(sk, &dst, &fl))
1406 ipv6_addr_copy(&fl.fl6_dst, final_p);
1408 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1412 newsk = tcp_create_openreq_child(sk, req, skb);
1416 /* Charge newly allocated IPv6 socket */
1417 #ifdef INET_REFCNT_DEBUG
1418 atomic_inc(&inet6_sock_nr);
1421 ip6_dst_store(newsk, dst, NULL);
1422 newsk->sk_route_caps = dst->dev->features &
1423 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1425 newtcp6sk = (struct tcp6_sock *)newsk;
1426 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1428 newtp = tcp_sk(newsk);
1429 newinet = inet_sk(newsk);
1430 newnp = inet6_sk(newsk);
1432 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1434 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1435 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1436 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1437 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1439 /* Now IPv6 options...
1441 First: no IPv4 options.
1443 newinet->opt = NULL;
1446 newnp->rxopt.all = np->rxopt.all;
1448 /* Clone pktoptions received with SYN */
1449 newnp->pktoptions = NULL;
1450 if (req->af.v6_req.pktopts) {
1451 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1453 kfree_skb(req->af.v6_req.pktopts);
1454 req->af.v6_req.pktopts = NULL;
1455 if (newnp->pktoptions)
1456 skb_set_owner_r(newnp->pktoptions, newsk);
1459 newnp->mcast_oif = tcp_v6_iif(skb);
1460 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1462 /* Clone native IPv6 options from listening socket (if any)
1464 Yes, keeping reference count would be much more clever,
1465 but we make one more one thing there: reattach optmem
1469 newnp->opt = ipv6_dup_options(newsk, opt);
1471 sock_kfree_s(sk, opt, opt->tot_len);
1474 newtp->ext_header_len = 0;
1476 newtp->ext_header_len = newnp->opt->opt_nflen +
1477 newnp->opt->opt_flen;
1478 newtp->ext2_header_len = dst->header_len;
1480 tcp_sync_mss(newsk, dst_pmtu(dst));
1481 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482 tcp_initialize_rcv_mss(newsk);
1484 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1486 __tcp_v6_hash(newsk);
1487 tcp_inherit_port(sk, newsk);
1492 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1494 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1495 if (opt && opt != np->opt)
1496 sock_kfree_s(sk, opt, opt->tot_len);
1501 static int tcp_v6_checksum_init(struct sk_buff *skb)
1503 if (skb->ip_summed == CHECKSUM_HW) {
1504 skb->ip_summed = CHECKSUM_UNNECESSARY;
1505 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1506 &skb->nh.ipv6h->daddr,skb->csum))
1508 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1510 if (skb->len <= 76) {
1511 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1512 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1514 skb->ip_summed = CHECKSUM_UNNECESSARY;
1516 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1517 &skb->nh.ipv6h->daddr,0);
1522 /* The socket must have it's spinlock held when we get
1525 * We have a potential double-lock case here, so even when
1526 * doing backlog processing we use the BH locking scheme.
1527 * This is because we cannot sleep with the original spinlock
1530 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1532 struct ipv6_pinfo *np = inet6_sk(sk);
1534 struct sk_buff *opt_skb = NULL;
1536 /* Imagine: socket is IPv6. IPv4 packet arrives,
1537 goes to IPv4 receive handler and backlogged.
1538 From backlog it always goes here. Kerboom...
1539 Fortunately, tcp_rcv_established and rcv_established
1540 handle them correctly, but it is not case with
1541 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1544 if (skb->protocol == htons(ETH_P_IP))
1545 return tcp_v4_do_rcv(sk, skb);
1547 if (sk_filter(sk, skb, 0))
1551 * socket locking is here for SMP purposes as backlog rcv
1552 * is currently called with bh processing disabled.
1555 /* Do Stevens' IPV6_PKTOPTIONS.
1557 Yes, guys, it is the only place in our code, where we
1558 may make it not affecting IPv4.
1559 The rest of code is protocol independent,
1560 and I do not like idea to uglify IPv4.
1562 Actually, all the idea behind IPV6_PKTOPTIONS
1563 looks not very well thought. For now we latch
1564 options, received in the last packet, enqueued
1565 by tcp. Feel free to propose better solution.
1569 opt_skb = skb_clone(skb, GFP_ATOMIC);
1571 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1572 TCP_CHECK_TIMER(sk);
1573 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1575 TCP_CHECK_TIMER(sk);
1577 goto ipv6_pktoptions;
1581 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1584 if (sk->sk_state == TCP_LISTEN) {
1585 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1590 * Queue it on the new socket if the new socket is active,
1591 * otherwise we just shortcircuit this and continue with
1595 if (tcp_child_process(sk, nsk, skb))
1598 __kfree_skb(opt_skb);
1603 TCP_CHECK_TIMER(sk);
1604 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1606 TCP_CHECK_TIMER(sk);
1608 goto ipv6_pktoptions;
1612 tcp_v6_send_reset(skb);
1615 __kfree_skb(opt_skb);
1619 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1624 /* Do you ask, what is it?
1626 1. skb was enqueued by tcp.
1627 2. skb is added to tail of read queue, rather than out of order.
1628 3. socket is not in passive state.
1629 4. Finally, it really contains options, which user wants to receive.
1632 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1633 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1634 if (np->rxopt.bits.rxinfo)
1635 np->mcast_oif = tcp_v6_iif(opt_skb);
1636 if (np->rxopt.bits.rxhlim)
1637 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1638 if (ipv6_opt_accepted(sk, opt_skb)) {
1639 skb_set_owner_r(opt_skb, sk);
1640 opt_skb = xchg(&np->pktoptions, opt_skb);
1642 __kfree_skb(opt_skb);
1643 opt_skb = xchg(&np->pktoptions, NULL);
1652 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1654 struct sk_buff *skb = *pskb;
1659 if (skb->pkt_type != PACKET_HOST)
1663 * Count it even if it's bad.
1665 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1667 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1672 if (th->doff < sizeof(struct tcphdr)/4)
1674 if (!pskb_may_pull(skb, th->doff*4))
1677 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1678 tcp_v6_checksum_init(skb) < 0))
1682 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1683 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1684 skb->len - th->doff*4);
1685 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1686 TCP_SKB_CB(skb)->when = 0;
1687 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1688 TCP_SKB_CB(skb)->sacked = 0;
1690 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1691 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1697 if (sk->sk_state == TCP_TIME_WAIT)
1700 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1701 goto discard_and_relse;
1703 if (sk_filter(sk, skb, 0))
1704 goto discard_and_relse;
1710 if (!sock_owned_by_user(sk)) {
1711 if (!tcp_prequeue(sk, skb))
1712 ret = tcp_v6_do_rcv(sk, skb);
1714 sk_add_backlog(sk, skb);
1718 return ret ? -1 : 0;
1721 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1724 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1726 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1728 tcp_v6_send_reset(skb);
1745 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1746 tcp_tw_put((struct tcp_tw_bucket *) sk);
1750 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1751 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1752 tcp_tw_put((struct tcp_tw_bucket *) sk);
1756 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1757 skb, th, skb->len)) {
1762 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1764 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1765 tcp_tw_put((struct tcp_tw_bucket *)sk);
1769 /* Fall through to ACK */
1772 tcp_v6_timewait_ack(sk, skb);
1776 case TCP_TW_SUCCESS:;
1781 static int tcp_v6_rebuild_header(struct sock *sk)
1784 struct dst_entry *dst;
1785 struct ipv6_pinfo *np = inet6_sk(sk);
1787 dst = __sk_dst_check(sk, np->dst_cookie);
1790 struct inet_opt *inet = inet_sk(sk);
1791 struct in6_addr *final_p = NULL, final;
1794 memset(&fl, 0, sizeof(fl));
1795 fl.proto = IPPROTO_TCP;
1796 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1797 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1798 fl.fl6_flowlabel = np->flow_label;
1799 fl.oif = sk->sk_bound_dev_if;
1800 fl.fl_ip_dport = inet->dport;
1801 fl.fl_ip_sport = inet->sport;
1803 if (np->opt && np->opt->srcrt) {
1804 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1805 ipv6_addr_copy(&final, &fl.fl6_dst);
1806 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1810 err = ip6_dst_lookup(sk, &dst, &fl);
1812 sk->sk_route_caps = 0;
1816 ipv6_addr_copy(&fl.fl6_dst, final_p);
1818 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1819 sk->sk_err_soft = -err;
1824 ip6_dst_store(sk, dst, NULL);
1825 sk->sk_route_caps = dst->dev->features &
1826 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1827 tcp_sk(sk)->ext2_header_len = dst->header_len;
1833 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1835 struct sock *sk = skb->sk;
1836 struct inet_opt *inet = inet_sk(sk);
1837 struct ipv6_pinfo *np = inet6_sk(sk);
1839 struct dst_entry *dst;
1840 struct in6_addr *final_p = NULL, final;
1842 memset(&fl, 0, sizeof(fl));
1843 fl.proto = IPPROTO_TCP;
1844 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1845 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1846 fl.fl6_flowlabel = np->flow_label;
1847 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1848 fl.oif = sk->sk_bound_dev_if;
1849 fl.fl_ip_sport = inet->sport;
1850 fl.fl_ip_dport = inet->dport;
1852 if (np->opt && np->opt->srcrt) {
1853 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1854 ipv6_addr_copy(&final, &fl.fl6_dst);
1855 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1859 dst = __sk_dst_check(sk, np->dst_cookie);
1862 int err = ip6_dst_lookup(sk, &dst, &fl);
1865 sk->sk_err_soft = -err;
1870 ipv6_addr_copy(&fl.fl6_dst, final_p);
1872 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1873 sk->sk_route_caps = 0;
1878 ip6_dst_store(sk, dst, NULL);
1879 sk->sk_route_caps = dst->dev->features &
1880 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1881 tcp_sk(sk)->ext2_header_len = dst->header_len;
1884 skb->dst = dst_clone(dst);
1886 /* Restore final destination back after routing done */
1887 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1889 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1892 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1894 struct ipv6_pinfo *np = inet6_sk(sk);
1895 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1897 sin6->sin6_family = AF_INET6;
1898 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1899 sin6->sin6_port = inet_sk(sk)->dport;
1900 /* We do not store received flowlabel for TCP */
1901 sin6->sin6_flowinfo = 0;
1902 sin6->sin6_scope_id = 0;
1903 if (sk->sk_bound_dev_if &&
1904 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1905 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1908 static int tcp_v6_remember_stamp(struct sock *sk)
1910 /* Alas, not yet... */
1914 static struct tcp_func ipv6_specific = {
1915 .queue_xmit = tcp_v6_xmit,
1916 .send_check = tcp_v6_send_check,
1917 .rebuild_header = tcp_v6_rebuild_header,
1918 .conn_request = tcp_v6_conn_request,
1919 .syn_recv_sock = tcp_v6_syn_recv_sock,
1920 .remember_stamp = tcp_v6_remember_stamp,
1921 .net_header_len = sizeof(struct ipv6hdr),
1923 .setsockopt = ipv6_setsockopt,
1924 .getsockopt = ipv6_getsockopt,
1925 .addr2sockaddr = v6_addr2sockaddr,
1926 .sockaddr_len = sizeof(struct sockaddr_in6)
1930 * TCP over IPv4 via INET6 API
1933 static struct tcp_func ipv6_mapped = {
1934 .queue_xmit = ip_queue_xmit,
1935 .send_check = tcp_v4_send_check,
1936 .rebuild_header = tcp_v4_rebuild_header,
1937 .conn_request = tcp_v6_conn_request,
1938 .syn_recv_sock = tcp_v6_syn_recv_sock,
1939 .remember_stamp = tcp_v4_remember_stamp,
1940 .net_header_len = sizeof(struct iphdr),
1942 .setsockopt = ipv6_setsockopt,
1943 .getsockopt = ipv6_getsockopt,
1944 .addr2sockaddr = v6_addr2sockaddr,
1945 .sockaddr_len = sizeof(struct sockaddr_in6)
1950 /* NOTE: A lot of things set to zero explicitly by call to
1951 * sk_alloc() so need not be done here.
1953 static int tcp_v6_init_sock(struct sock *sk)
1955 struct tcp_opt *tp = tcp_sk(sk);
1957 skb_queue_head_init(&tp->out_of_order_queue);
1958 tcp_init_xmit_timers(sk);
1959 tcp_prequeue_init(tp);
1961 tp->rto = TCP_TIMEOUT_INIT;
1962 tp->mdev = TCP_TIMEOUT_INIT;
1964 /* So many TCP implementations out there (incorrectly) count the
1965 * initial SYN frame in their delayed-ACK and congestion control
1966 * algorithms that we must have the following bandaid to talk
1967 * efficiently to them. -DaveM
1971 /* See draft-stevens-tcpca-spec-01 for discussion of the
1972 * initialization of these values.
1974 tp->snd_ssthresh = 0x7fffffff;
1975 tp->snd_cwnd_clamp = ~0;
1976 tp->mss_cache_std = tp->mss_cache = 536;
1978 tp->reordering = sysctl_tcp_reordering;
1980 sk->sk_state = TCP_CLOSE;
1982 tp->af_specific = &ipv6_specific;
1984 sk->sk_write_space = sk_stream_write_space;
1985 sk->sk_use_write_queue = 1;
1987 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1988 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1990 atomic_inc(&tcp_sockets_allocated);
1995 static int tcp_v6_destroy_sock(struct sock *sk)
1997 extern int tcp_v4_destroy_sock(struct sock *sk);
1999 tcp_v4_destroy_sock(sk);
2000 return inet6_destroy_sock(sk);
2003 /* Proc filesystem TCPv6 sock list dumping. */
2004 static void get_openreq6(struct seq_file *seq,
2005 struct sock *sk, struct open_request *req, int i, int uid)
2007 struct in6_addr *dest, *src;
2008 int ttd = req->expires - jiffies;
2013 src = &req->af.v6_req.loc_addr;
2014 dest = &req->af.v6_req.rmt_addr;
2016 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2017 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2019 src->s6_addr32[0], src->s6_addr32[1],
2020 src->s6_addr32[2], src->s6_addr32[3],
2021 ntohs(inet_sk(sk)->sport),
2022 dest->s6_addr32[0], dest->s6_addr32[1],
2023 dest->s6_addr32[2], dest->s6_addr32[3],
2024 ntohs(req->rmt_port),
2026 0,0, /* could print option size, but that is af dependent. */
2027 1, /* timers active (only the expire timer) */
2028 jiffies_to_clock_t(ttd),
2031 0, /* non standard timer */
2032 0, /* open_requests have no inode */
2036 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2038 struct in6_addr *dest, *src;
2041 unsigned long timer_expires;
2042 struct inet_opt *inet = inet_sk(sp);
2043 struct tcp_opt *tp = tcp_sk(sp);
2044 struct ipv6_pinfo *np = inet6_sk(sp);
2047 src = &np->rcv_saddr;
2048 destp = ntohs(inet->dport);
2049 srcp = ntohs(inet->sport);
2050 if (tp->pending == TCP_TIME_RETRANS) {
2052 timer_expires = tp->timeout;
2053 } else if (tp->pending == TCP_TIME_PROBE0) {
2055 timer_expires = tp->timeout;
2056 } else if (timer_pending(&sp->sk_timer)) {
2058 timer_expires = sp->sk_timer.expires;
2061 timer_expires = jiffies;
2065 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2066 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2068 src->s6_addr32[0], src->s6_addr32[1],
2069 src->s6_addr32[2], src->s6_addr32[3], srcp,
2070 dest->s6_addr32[0], dest->s6_addr32[1],
2071 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2073 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2075 jiffies_to_clock_t(timer_expires - jiffies),
2080 atomic_read(&sp->sk_refcnt), sp,
2081 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2082 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2086 static void get_timewait6_sock(struct seq_file *seq,
2087 struct tcp_tw_bucket *tw, int i)
2089 struct in6_addr *dest, *src;
2091 int ttd = tw->tw_ttd - jiffies;
2096 dest = &tw->tw_v6_daddr;
2097 src = &tw->tw_v6_rcv_saddr;
2098 destp = ntohs(tw->tw_dport);
2099 srcp = ntohs(tw->tw_sport);
2102 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2103 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2105 src->s6_addr32[0], src->s6_addr32[1],
2106 src->s6_addr32[2], src->s6_addr32[3], srcp,
2107 dest->s6_addr32[0], dest->s6_addr32[1],
2108 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2109 tw->tw_substate, 0, 0,
2110 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2111 atomic_read(&tw->tw_refcnt), tw);
2114 #ifdef CONFIG_PROC_FS
2115 static int tcp6_seq_show(struct seq_file *seq, void *v)
2117 struct tcp_iter_state *st;
2119 if (v == SEQ_START_TOKEN) {
2124 "st tx_queue rx_queue tr tm->when retrnsmt"
2125 " uid timeout inode\n");
2130 switch (st->state) {
2131 case TCP_SEQ_STATE_LISTENING:
2132 case TCP_SEQ_STATE_ESTABLISHED:
2133 get_tcp6_sock(seq, v, st->num);
2135 case TCP_SEQ_STATE_OPENREQ:
2136 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2138 case TCP_SEQ_STATE_TIME_WAIT:
2139 get_timewait6_sock(seq, v, st->num);
2146 static struct file_operations tcp6_seq_fops;
2147 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2148 .owner = THIS_MODULE,
2151 .seq_show = tcp6_seq_show,
2152 .seq_fops = &tcp6_seq_fops,
2155 int __init tcp6_proc_init(void)
2157 return tcp_proc_register(&tcp6_seq_afinfo);
2160 void tcp6_proc_exit(void)
2162 tcp_proc_unregister(&tcp6_seq_afinfo);
2166 struct proto tcpv6_prot = {
2168 .owner = THIS_MODULE,
2170 .connect = tcp_v6_connect,
2171 .disconnect = tcp_disconnect,
2172 .accept = tcp_accept,
2174 .init = tcp_v6_init_sock,
2175 .destroy = tcp_v6_destroy_sock,
2176 .shutdown = tcp_shutdown,
2177 .setsockopt = tcp_setsockopt,
2178 .getsockopt = tcp_getsockopt,
2179 .sendmsg = tcp_sendmsg,
2180 .recvmsg = tcp_recvmsg,
2181 .backlog_rcv = tcp_v6_do_rcv,
2182 .hash = tcp_v6_hash,
2183 .unhash = tcp_unhash,
2184 .get_port = tcp_v6_get_port,
2185 .enter_memory_pressure = tcp_enter_memory_pressure,
2186 .sockets_allocated = &tcp_sockets_allocated,
2187 .memory_allocated = &tcp_memory_allocated,
2188 .memory_pressure = &tcp_memory_pressure,
2189 .sysctl_mem = sysctl_tcp_mem,
2190 .sysctl_wmem = sysctl_tcp_wmem,
2191 .sysctl_rmem = sysctl_tcp_rmem,
2192 .max_header = MAX_TCP_HEADER,
2193 .slab_obj_size = sizeof(struct tcp6_sock),
2196 static struct inet6_protocol tcpv6_protocol = {
2197 .handler = tcp_v6_rcv,
2198 .err_handler = tcp_v6_err,
2199 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2202 extern struct proto_ops inet6_stream_ops;
2204 static struct inet_protosw tcpv6_protosw = {
2205 .type = SOCK_STREAM,
2206 .protocol = IPPROTO_TCP,
2207 .prot = &tcpv6_prot,
2208 .ops = &inet6_stream_ops,
2211 .flags = INET_PROTOSW_PERMANENT,
2214 void __init tcpv6_init(void)
2216 /* register inet6 protocol */
2217 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2218 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2219 inet6_register_protosw(&tcpv6_protosw);