3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 if (tcp_port_rover < low)
145 rover = tcp_port_rover;
149 head = &tcp_bhash[tcp_bhashfn(rover)];
150 spin_lock(&head->lock);
151 tb_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_port_rover = rover;
159 spin_unlock(&tcp_portalloc_lock);
161 /* Exhausted local port range during search? */
166 /* OK, here is the one we will use. */
169 head = &tcp_bhash[tcp_bhashfn(snum)];
170 spin_lock(&head->lock);
171 tb_for_each(tb, node, &head->chain)
172 if (tb->port == snum)
178 if (tb && !hlist_empty(&tb->owners)) {
179 if (tb->fastreuse > 0 && sk->sk_reuse &&
180 sk->sk_state != TCP_LISTEN) {
184 if (tcp_v6_bind_conflict(sk, tb))
190 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
192 if (hlist_empty(&tb->owners)) {
193 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
197 } else if (tb->fastreuse &&
198 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
202 if (!tcp_sk(sk)->bind_hash)
203 tcp_bind_hash(sk, tb, snum);
204 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
208 spin_unlock(&head->lock);
214 static __inline__ void __tcp_v6_hash(struct sock *sk)
216 struct hlist_head *list;
219 BUG_TRAP(sk_unhashed(sk));
221 if (sk->sk_state == TCP_LISTEN) {
222 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
223 lock = &tcp_lhash_lock;
226 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
227 list = &tcp_ehash[sk->sk_hashent].chain;
228 lock = &tcp_ehash[sk->sk_hashent].lock;
232 __sk_add_node(sk, list);
233 sock_prot_inc_use(sk->sk_prot);
238 static void tcp_v6_hash(struct sock *sk)
240 if (sk->sk_state != TCP_CLOSE) {
241 struct tcp_sock *tp = tcp_sk(sk);
243 if (tp->af_specific == &ipv6_mapped) {
253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
256 struct hlist_node *node;
257 struct sock *result = NULL;
261 read_lock(&tcp_lhash_lock);
262 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
263 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
264 struct ipv6_pinfo *np = inet6_sk(sk);
267 if (!ipv6_addr_any(&np->rcv_saddr)) {
268 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
272 if (sk->sk_bound_dev_if) {
273 if (sk->sk_bound_dev_if != dif)
281 if (score > hiscore) {
289 read_unlock(&tcp_lhash_lock);
293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
294 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
296 * The sockhash lock must be held as a reader here.
299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
300 struct in6_addr *daddr, u16 hnum,
303 struct tcp_ehash_bucket *head;
305 struct hlist_node *node;
306 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
309 /* Optimize here for direct hit, only listening connections can
310 * have wildcards anyways.
312 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
313 head = &tcp_ehash[hash];
314 read_lock(&head->lock);
315 sk_for_each(sk, node, &head->chain) {
316 /* For IPV6 do the cheaper port and family tests first. */
317 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
318 goto hit; /* You sunk my battleship! */
320 /* Must check for a TIME_WAIT'er before going to listener hash. */
321 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
322 /* FIXME: acme: check this... */
323 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
325 if(*((__u32 *)&(tw->tw_dport)) == ports &&
326 sk->sk_family == PF_INET6) {
327 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
328 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
329 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
333 read_unlock(&head->lock);
338 read_unlock(&head->lock);
343 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
344 struct in6_addr *daddr, u16 hnum,
349 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
354 return tcp_v6_lookup_listener(daddr, hnum, dif);
357 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
358 struct in6_addr *daddr, u16 dport,
364 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
370 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
374 * Open request hash tables.
377 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
381 a = raddr->s6_addr32[0];
382 b = raddr->s6_addr32[1];
383 c = raddr->s6_addr32[2];
385 a += JHASH_GOLDEN_RATIO;
386 b += JHASH_GOLDEN_RATIO;
388 __jhash_mix(a, b, c);
390 a += raddr->s6_addr32[3];
392 __jhash_mix(a, b, c);
394 return c & (TCP_SYNQ_HSIZE - 1);
397 static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp,
400 struct in6_addr *raddr,
401 struct in6_addr *laddr,
404 struct tcp_listen_opt *lopt = tp->listen_opt;
405 struct open_request *req, **prev;
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL;
409 prev = &req->dl_next) {
410 if (req->rmt_port == rport &&
411 req->class->family == AF_INET6 &&
412 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
413 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
414 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
415 BUG_TRAP(req->sk == NULL);
424 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
425 struct in6_addr *saddr,
426 struct in6_addr *daddr,
429 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
432 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
434 if (skb->protocol == htons(ETH_P_IPV6)) {
435 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
436 skb->nh.ipv6h->saddr.s6_addr32,
440 return secure_tcp_sequence_number(skb->nh.iph->daddr,
447 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
448 struct tcp_tw_bucket **twp)
450 struct inet_sock *inet = inet_sk(sk);
451 struct ipv6_pinfo *np = inet6_sk(sk);
452 struct in6_addr *daddr = &np->rcv_saddr;
453 struct in6_addr *saddr = &np->daddr;
454 int dif = sk->sk_bound_dev_if;
455 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
456 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
457 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
459 struct hlist_node *node;
460 struct tcp_tw_bucket *tw;
462 write_lock(&head->lock);
464 /* Check TIME-WAIT sockets first. */
465 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
466 tw = (struct tcp_tw_bucket*)sk2;
468 if(*((__u32 *)&(tw->tw_dport)) == ports &&
469 sk2->sk_family == PF_INET6 &&
470 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
471 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
472 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
473 struct tcp_sock *tp = tcp_sk(sk);
475 if (tw->tw_ts_recent_stamp &&
476 (!twp || (sysctl_tcp_tw_reuse &&
478 tw->tw_ts_recent_stamp > 1))) {
479 /* See comment in tcp_ipv4.c */
480 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
483 tp->rx_opt.ts_recent = tw->tw_ts_recent;
484 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
493 /* And established part... */
494 sk_for_each(sk2, node, &head->chain) {
495 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
500 BUG_TRAP(sk_unhashed(sk));
501 __sk_add_node(sk, &head->chain);
502 sk->sk_hashent = hash;
503 sock_prot_inc_use(sk->sk_prot);
504 write_unlock(&head->lock);
508 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
510 /* Silly. Should hash-dance instead... */
511 tcp_tw_deschedule(tw);
512 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
519 write_unlock(&head->lock);
520 return -EADDRNOTAVAIL;
523 static inline u32 tcpv6_port_offset(const struct sock *sk)
525 const struct inet_sock *inet = inet_sk(sk);
526 const struct ipv6_pinfo *np = inet6_sk(sk);
528 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
533 static int tcp_v6_hash_connect(struct sock *sk)
535 unsigned short snum = inet_sk(sk)->num;
536 struct tcp_bind_hashbucket *head;
537 struct tcp_bind_bucket *tb;
541 int low = sysctl_local_port_range[0];
542 int high = sysctl_local_port_range[1];
543 int range = high - low;
547 u32 offset = hint + tcpv6_port_offset(sk);
548 struct hlist_node *node;
549 struct tcp_tw_bucket *tw = NULL;
552 for (i = 1; i <= range; i++) {
553 port = low + (i + offset) % range;
554 head = &tcp_bhash[tcp_bhashfn(port)];
555 spin_lock(&head->lock);
557 /* Does not bother with rcv_saddr checks,
558 * because the established check is already
561 tb_for_each(tb, node, &head->chain) {
562 if (tb->port == port) {
563 BUG_TRAP(!hlist_empty(&tb->owners));
564 if (tb->fastreuse >= 0)
566 if (!__tcp_v6_check_established(sk,
574 tb = tcp_bucket_create(head, port);
576 spin_unlock(&head->lock);
583 spin_unlock(&head->lock);
587 return -EADDRNOTAVAIL;
592 /* Head lock still held and bh's disabled */
593 tcp_bind_hash(sk, tb, port);
594 if (sk_unhashed(sk)) {
595 inet_sk(sk)->sport = htons(port);
598 spin_unlock(&head->lock);
601 tcp_tw_deschedule(tw);
609 head = &tcp_bhash[tcp_bhashfn(snum)];
610 tb = tcp_sk(sk)->bind_hash;
611 spin_lock_bh(&head->lock);
613 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
615 spin_unlock_bh(&head->lock);
618 spin_unlock(&head->lock);
619 /* No definite answer... Walk to established hash table */
620 ret = __tcp_v6_check_established(sk, snum, NULL);
627 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
629 return IP6CB(skb)->iif;
632 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
635 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
636 struct inet_sock *inet = inet_sk(sk);
637 struct ipv6_pinfo *np = inet6_sk(sk);
638 struct tcp_sock *tp = tcp_sk(sk);
639 struct in6_addr *saddr = NULL, *final_p = NULL, final;
641 struct dst_entry *dst;
645 if (addr_len < SIN6_LEN_RFC2133)
648 if (usin->sin6_family != AF_INET6)
649 return(-EAFNOSUPPORT);
651 memset(&fl, 0, sizeof(fl));
654 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
655 IP6_ECN_flow_init(fl.fl6_flowlabel);
656 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
657 struct ip6_flowlabel *flowlabel;
658 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
659 if (flowlabel == NULL)
661 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
662 fl6_sock_release(flowlabel);
667 * connect() to INADDR_ANY means loopback (BSD'ism).
670 if(ipv6_addr_any(&usin->sin6_addr))
671 usin->sin6_addr.s6_addr[15] = 0x1;
673 addr_type = ipv6_addr_type(&usin->sin6_addr);
675 if(addr_type & IPV6_ADDR_MULTICAST)
678 if (addr_type&IPV6_ADDR_LINKLOCAL) {
679 if (addr_len >= sizeof(struct sockaddr_in6) &&
680 usin->sin6_scope_id) {
681 /* If interface is set while binding, indices
684 if (sk->sk_bound_dev_if &&
685 sk->sk_bound_dev_if != usin->sin6_scope_id)
688 sk->sk_bound_dev_if = usin->sin6_scope_id;
691 /* Connect to link-local address requires an interface */
692 if (!sk->sk_bound_dev_if)
696 if (tp->rx_opt.ts_recent_stamp &&
697 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
698 tp->rx_opt.ts_recent = 0;
699 tp->rx_opt.ts_recent_stamp = 0;
703 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
704 np->flow_label = fl.fl6_flowlabel;
710 if (addr_type == IPV6_ADDR_MAPPED) {
711 u32 exthdrlen = tp->ext_header_len;
712 struct sockaddr_in sin;
714 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
716 if (__ipv6_only_sock(sk))
719 sin.sin_family = AF_INET;
720 sin.sin_port = usin->sin6_port;
721 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
723 tp->af_specific = &ipv6_mapped;
724 sk->sk_backlog_rcv = tcp_v4_do_rcv;
726 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
729 tp->ext_header_len = exthdrlen;
730 tp->af_specific = &ipv6_specific;
731 sk->sk_backlog_rcv = tcp_v6_do_rcv;
734 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
736 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
743 if (!ipv6_addr_any(&np->rcv_saddr))
744 saddr = &np->rcv_saddr;
746 fl.proto = IPPROTO_TCP;
747 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
748 ipv6_addr_copy(&fl.fl6_src,
749 (saddr ? saddr : &np->saddr));
750 fl.oif = sk->sk_bound_dev_if;
751 fl.fl_ip_dport = usin->sin6_port;
752 fl.fl_ip_sport = inet->sport;
754 if (np->opt && np->opt->srcrt) {
755 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
756 ipv6_addr_copy(&final, &fl.fl6_dst);
757 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
761 err = ip6_dst_lookup(sk, &dst, &fl);
765 ipv6_addr_copy(&fl.fl6_dst, final_p);
767 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
774 ipv6_addr_copy(&np->rcv_saddr, saddr);
777 /* set the source address */
778 ipv6_addr_copy(&np->saddr, saddr);
779 inet->rcv_saddr = LOOPBACK4_IPV6;
781 ip6_dst_store(sk, dst, NULL);
782 sk->sk_route_caps = dst->dev->features &
783 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
785 tp->ext_header_len = 0;
787 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
789 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
791 inet->dport = usin->sin6_port;
793 tcp_set_state(sk, TCP_SYN_SENT);
794 err = tcp_v6_hash_connect(sk);
799 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
804 err = tcp_connect(sk);
811 tcp_set_state(sk, TCP_CLOSE);
815 sk->sk_route_caps = 0;
819 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
820 int type, int code, int offset, __u32 info)
822 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
823 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
824 struct ipv6_pinfo *np;
830 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
833 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
837 if (sk->sk_state == TCP_TIME_WAIT) {
838 tcp_tw_put((struct tcp_tw_bucket*)sk);
843 if (sock_owned_by_user(sk))
844 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
846 if (sk->sk_state == TCP_CLOSE)
850 seq = ntohl(th->seq);
851 if (sk->sk_state != TCP_LISTEN &&
852 !between(seq, tp->snd_una, tp->snd_nxt)) {
853 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
859 if (type == ICMPV6_PKT_TOOBIG) {
860 struct dst_entry *dst = NULL;
862 if (sock_owned_by_user(sk))
864 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
867 /* icmp should have updated the destination cache entry */
868 dst = __sk_dst_check(sk, np->dst_cookie);
871 struct inet_sock *inet = inet_sk(sk);
874 /* BUGGG_FUTURE: Again, it is not clear how
875 to handle rthdr case. Ignore this complexity
878 memset(&fl, 0, sizeof(fl));
879 fl.proto = IPPROTO_TCP;
880 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
881 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
882 fl.oif = sk->sk_bound_dev_if;
883 fl.fl_ip_dport = inet->dport;
884 fl.fl_ip_sport = inet->sport;
886 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
887 sk->sk_err_soft = -err;
891 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
892 sk->sk_err_soft = -err;
899 if (tp->pmtu_cookie > dst_mtu(dst)) {
900 tcp_sync_mss(sk, dst_mtu(dst));
901 tcp_simple_retransmit(sk);
902 } /* else let the usual retransmit timer handle it */
907 icmpv6_err_convert(type, code, &err);
909 /* Might be for an open_request */
910 switch (sk->sk_state) {
911 struct open_request *req, **prev;
913 if (sock_owned_by_user(sk))
916 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
917 &hdr->saddr, tcp_v6_iif(skb));
921 /* ICMPs are not backlogged, hence we cannot get
922 * an established socket here.
924 BUG_TRAP(req->sk == NULL);
926 if (seq != req->snt_isn) {
927 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
931 tcp_synq_drop(sk, req, prev);
935 case TCP_SYN_RECV: /* Cannot happen.
936 It can, it SYNs are crossed. --ANK */
937 if (!sock_owned_by_user(sk)) {
938 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
940 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
944 sk->sk_err_soft = err;
948 if (!sock_owned_by_user(sk) && np->recverr) {
950 sk->sk_error_report(sk);
952 sk->sk_err_soft = err;
960 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
961 struct dst_entry *dst)
963 struct ipv6_pinfo *np = inet6_sk(sk);
964 struct sk_buff * skb;
965 struct ipv6_txoptions *opt = NULL;
966 struct in6_addr * final_p = NULL, final;
970 memset(&fl, 0, sizeof(fl));
971 fl.proto = IPPROTO_TCP;
972 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
973 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
974 fl.fl6_flowlabel = 0;
975 fl.oif = req->af.v6_req.iif;
976 fl.fl_ip_dport = req->rmt_port;
977 fl.fl_ip_sport = inet_sk(sk)->sport;
982 np->rxopt.bits.srcrt == 2 &&
983 req->af.v6_req.pktopts) {
984 struct sk_buff *pktopts = req->af.v6_req.pktopts;
985 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
987 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
990 if (opt && opt->srcrt) {
991 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
992 ipv6_addr_copy(&final, &fl.fl6_dst);
993 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
997 err = ip6_dst_lookup(sk, &dst, &fl);
1001 ipv6_addr_copy(&fl.fl6_dst, final_p);
1002 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1006 skb = tcp_make_synack(sk, dst, req);
1008 struct tcphdr *th = skb->h.th;
1010 th->check = tcp_v6_check(th, skb->len,
1011 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1012 csum_partial((char *)th, skb->len, skb->csum));
1014 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1015 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016 if (err == NET_XMIT_CN)
1022 if (opt && opt != np->opt)
1023 sock_kfree_s(sk, opt, opt->tot_len);
1027 static void tcp_v6_or_free(struct open_request *req)
1029 if (req->af.v6_req.pktopts)
1030 kfree_skb(req->af.v6_req.pktopts);
1033 static struct or_calltable or_ipv6 = {
1035 .rtx_syn_ack = tcp_v6_send_synack,
1036 .send_ack = tcp_v6_or_send_ack,
1037 .destructor = tcp_v6_or_free,
1038 .send_reset = tcp_v6_send_reset
1041 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1043 struct ipv6_pinfo *np = inet6_sk(sk);
1044 struct inet6_skb_parm *opt = IP6CB(skb);
1046 if (np->rxopt.all) {
1047 if ((opt->hop && np->rxopt.bits.hopopts) ||
1048 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1049 np->rxopt.bits.rxflow) ||
1050 (opt->srcrt && np->rxopt.bits.srcrt) ||
1051 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1058 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1059 struct sk_buff *skb)
1061 struct ipv6_pinfo *np = inet6_sk(sk);
1063 if (skb->ip_summed == CHECKSUM_HW) {
1064 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1065 skb->csum = offsetof(struct tcphdr, check);
1067 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1068 csum_partial((char *)th, th->doff<<2,
1074 static void tcp_v6_send_reset(struct sk_buff *skb)
1076 struct tcphdr *th = skb->h.th, *t1;
1077 struct sk_buff *buff;
1083 if (!ipv6_unicast_destination(skb))
1087 * We need to grab some memory, and put together an RST,
1088 * and then put it into the queue to be sent.
1091 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1096 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1098 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1100 /* Swap the send and the receive. */
1101 memset(t1, 0, sizeof(*t1));
1102 t1->dest = th->source;
1103 t1->source = th->dest;
1104 t1->doff = sizeof(*t1)/4;
1108 t1->seq = th->ack_seq;
1111 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1112 + skb->len - (th->doff<<2));
1115 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1117 memset(&fl, 0, sizeof(fl));
1118 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1119 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1121 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1122 sizeof(*t1), IPPROTO_TCP,
1125 fl.proto = IPPROTO_TCP;
1126 fl.oif = tcp_v6_iif(skb);
1127 fl.fl_ip_dport = t1->dest;
1128 fl.fl_ip_sport = t1->source;
1130 /* sk = NULL, but it is safe for now. RST socket required. */
1131 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1133 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1134 dst_release(buff->dst);
1138 ip6_xmit(NULL, buff, &fl, NULL, 0);
1139 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1140 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1147 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1149 struct tcphdr *th = skb->h.th, *t1;
1150 struct sk_buff *buff;
1152 int tot_len = sizeof(struct tcphdr);
1157 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1162 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1164 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1166 /* Swap the send and the receive. */
1167 memset(t1, 0, sizeof(*t1));
1168 t1->dest = th->source;
1169 t1->source = th->dest;
1170 t1->doff = tot_len/4;
1171 t1->seq = htonl(seq);
1172 t1->ack_seq = htonl(ack);
1174 t1->window = htons(win);
1177 u32 *ptr = (u32*)(t1 + 1);
1178 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1179 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1180 *ptr++ = htonl(tcp_time_stamp);
1184 buff->csum = csum_partial((char *)t1, tot_len, 0);
1186 memset(&fl, 0, sizeof(fl));
1187 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1188 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1190 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1191 tot_len, IPPROTO_TCP,
1194 fl.proto = IPPROTO_TCP;
1195 fl.oif = tcp_v6_iif(skb);
1196 fl.fl_ip_dport = t1->dest;
1197 fl.fl_ip_sport = t1->source;
1199 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1200 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1201 dst_release(buff->dst);
1204 ip6_xmit(NULL, buff, &fl, NULL, 0);
1205 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1212 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1214 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1216 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1217 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1222 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1224 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1228 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1230 struct open_request *req, **prev;
1231 struct tcphdr *th = skb->h.th;
1232 struct tcp_sock *tp = tcp_sk(sk);
1235 /* Find possible connection requests. */
1236 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1237 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1239 return tcp_check_req(sk, skb, req, prev);
1241 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1243 &skb->nh.ipv6h->daddr,
1248 if (nsk->sk_state != TCP_TIME_WAIT) {
1252 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1256 #if 0 /*def CONFIG_SYN_COOKIES*/
1257 if (!th->rst && !th->syn && th->ack)
1258 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1263 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1265 struct tcp_sock *tp = tcp_sk(sk);
1266 struct tcp_listen_opt *lopt = tp->listen_opt;
1267 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1270 req->expires = jiffies + TCP_TIMEOUT_INIT;
1272 req->dl_next = lopt->syn_table[h];
1274 write_lock(&tp->syn_wait_lock);
1275 lopt->syn_table[h] = req;
1276 write_unlock(&tp->syn_wait_lock);
1282 /* FIXME: this is substantially similar to the ipv4 code.
1283 * Can some kind of merge be done? -- erics
1285 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1287 struct ipv6_pinfo *np = inet6_sk(sk);
1288 struct tcp_options_received tmp_opt;
1289 struct tcp_sock *tp = tcp_sk(sk);
1290 struct open_request *req = NULL;
1291 __u32 isn = TCP_SKB_CB(skb)->when;
1293 if (skb->protocol == htons(ETH_P_IP))
1294 return tcp_v4_conn_request(sk, skb);
1296 if (!ipv6_unicast_destination(skb))
1301 * There are no SYN attacks on IPv6, yet...
1303 if (tcp_synq_is_full(sk) && !isn) {
1304 if (net_ratelimit())
1305 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1309 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1313 req = tcp_openreq_alloc();
1317 tcp_clear_options(&tmp_opt);
1318 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1319 tmp_opt.user_mss = tp->rx_opt.user_mss;
1321 tcp_parse_options(skb, &tmp_opt, 0);
1323 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1324 tcp_openreq_init(req, &tmp_opt, skb);
1326 req->class = &or_ipv6;
1327 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1328 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1329 TCP_ECN_create_request(req, skb->h.th);
1330 req->af.v6_req.pktopts = NULL;
1331 if (ipv6_opt_accepted(sk, skb) ||
1332 np->rxopt.bits.rxinfo ||
1333 np->rxopt.bits.rxhlim) {
1334 atomic_inc(&skb->users);
1335 req->af.v6_req.pktopts = skb;
1337 req->af.v6_req.iif = sk->sk_bound_dev_if;
1339 /* So that link locals have meaning */
1340 if (!sk->sk_bound_dev_if &&
1341 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1342 req->af.v6_req.iif = tcp_v6_iif(skb);
1345 isn = tcp_v6_init_sequence(sk,skb);
1349 if (tcp_v6_send_synack(sk, req, NULL))
1352 tcp_v6_synq_add(sk, req);
1358 tcp_openreq_free(req);
1360 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1361 return 0; /* don't send reset */
1364 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1365 struct open_request *req,
1366 struct dst_entry *dst)
1368 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1369 struct tcp6_sock *newtcp6sk;
1370 struct inet_sock *newinet;
1371 struct tcp_sock *newtp;
1373 struct ipv6_txoptions *opt;
1375 if (skb->protocol == htons(ETH_P_IP)) {
1380 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1385 newtcp6sk = (struct tcp6_sock *)newsk;
1386 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1388 newinet = inet_sk(newsk);
1389 newnp = inet6_sk(newsk);
1390 newtp = tcp_sk(newsk);
1392 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1394 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1397 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1400 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1402 newtp->af_specific = &ipv6_mapped;
1403 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1404 newnp->pktoptions = NULL;
1406 newnp->mcast_oif = tcp_v6_iif(skb);
1407 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1409 /* Charge newly allocated IPv6 socket. Though it is mapped,
1412 #ifdef INET_REFCNT_DEBUG
1413 atomic_inc(&inet6_sock_nr);
1416 /* It is tricky place. Until this moment IPv4 tcp
1417 worked with IPv6 af_tcp.af_specific.
1420 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1427 if (sk_acceptq_is_full(sk))
1430 if (np->rxopt.bits.srcrt == 2 &&
1431 opt == NULL && req->af.v6_req.pktopts) {
1432 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1434 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1438 struct in6_addr *final_p = NULL, final;
1441 memset(&fl, 0, sizeof(fl));
1442 fl.proto = IPPROTO_TCP;
1443 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1444 if (opt && opt->srcrt) {
1445 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1446 ipv6_addr_copy(&final, &fl.fl6_dst);
1447 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1450 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1451 fl.oif = sk->sk_bound_dev_if;
1452 fl.fl_ip_dport = req->rmt_port;
1453 fl.fl_ip_sport = inet_sk(sk)->sport;
1455 if (ip6_dst_lookup(sk, &dst, &fl))
1459 ipv6_addr_copy(&fl.fl6_dst, final_p);
1461 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1465 newsk = tcp_create_openreq_child(sk, req, skb);
1469 /* Charge newly allocated IPv6 socket */
1470 #ifdef INET_REFCNT_DEBUG
1471 atomic_inc(&inet6_sock_nr);
1474 ip6_dst_store(newsk, dst, NULL);
1475 newsk->sk_route_caps = dst->dev->features &
1476 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1478 newtcp6sk = (struct tcp6_sock *)newsk;
1479 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1481 newtp = tcp_sk(newsk);
1482 newinet = inet_sk(newsk);
1483 newnp = inet6_sk(newsk);
1485 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1487 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1488 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1489 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1490 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1492 /* Now IPv6 options...
1494 First: no IPv4 options.
1496 newinet->opt = NULL;
1499 newnp->rxopt.all = np->rxopt.all;
1501 /* Clone pktoptions received with SYN */
1502 newnp->pktoptions = NULL;
1503 if (req->af.v6_req.pktopts) {
1504 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1506 kfree_skb(req->af.v6_req.pktopts);
1507 req->af.v6_req.pktopts = NULL;
1508 if (newnp->pktoptions)
1509 skb_set_owner_r(newnp->pktoptions, newsk);
1512 newnp->mcast_oif = tcp_v6_iif(skb);
1513 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1515 /* Clone native IPv6 options from listening socket (if any)
1517 Yes, keeping reference count would be much more clever,
1518 but we make one more one thing there: reattach optmem
1522 newnp->opt = ipv6_dup_options(newsk, opt);
1524 sock_kfree_s(sk, opt, opt->tot_len);
1527 newtp->ext_header_len = 0;
1529 newtp->ext_header_len = newnp->opt->opt_nflen +
1530 newnp->opt->opt_flen;
1532 tcp_sync_mss(newsk, dst_mtu(dst));
1533 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1534 tcp_initialize_rcv_mss(newsk);
1536 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1538 __tcp_v6_hash(newsk);
1539 tcp_inherit_port(sk, newsk);
1544 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1546 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1547 if (opt && opt != np->opt)
1548 sock_kfree_s(sk, opt, opt->tot_len);
1553 static int tcp_v6_checksum_init(struct sk_buff *skb)
1555 if (skb->ip_summed == CHECKSUM_HW) {
1556 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1558 &skb->nh.ipv6h->daddr,skb->csum))
1560 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1562 if (skb->len <= 76) {
1563 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1566 skb->ip_summed = CHECKSUM_UNNECESSARY;
1568 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1569 &skb->nh.ipv6h->daddr,0);
1574 /* The socket must have it's spinlock held when we get
1577 * We have a potential double-lock case here, so even when
1578 * doing backlog processing we use the BH locking scheme.
1579 * This is because we cannot sleep with the original spinlock
1582 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1584 struct ipv6_pinfo *np = inet6_sk(sk);
1585 struct tcp_sock *tp;
1586 struct sk_buff *opt_skb = NULL;
1588 /* Imagine: socket is IPv6. IPv4 packet arrives,
1589 goes to IPv4 receive handler and backlogged.
1590 From backlog it always goes here. Kerboom...
1591 Fortunately, tcp_rcv_established and rcv_established
1592 handle them correctly, but it is not case with
1593 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1596 if (skb->protocol == htons(ETH_P_IP))
1597 return tcp_v4_do_rcv(sk, skb);
1599 if (sk_filter(sk, skb, 0))
1603 * socket locking is here for SMP purposes as backlog rcv
1604 * is currently called with bh processing disabled.
1607 /* Do Stevens' IPV6_PKTOPTIONS.
1609 Yes, guys, it is the only place in our code, where we
1610 may make it not affecting IPv4.
1611 The rest of code is protocol independent,
1612 and I do not like idea to uglify IPv4.
1614 Actually, all the idea behind IPV6_PKTOPTIONS
1615 looks not very well thought. For now we latch
1616 options, received in the last packet, enqueued
1617 by tcp. Feel free to propose better solution.
1621 opt_skb = skb_clone(skb, GFP_ATOMIC);
1623 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624 TCP_CHECK_TIMER(sk);
1625 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1627 TCP_CHECK_TIMER(sk);
1629 goto ipv6_pktoptions;
1633 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1636 if (sk->sk_state == TCP_LISTEN) {
1637 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1642 * Queue it on the new socket if the new socket is active,
1643 * otherwise we just shortcircuit this and continue with
1647 if (tcp_child_process(sk, nsk, skb))
1650 __kfree_skb(opt_skb);
1655 TCP_CHECK_TIMER(sk);
1656 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1658 TCP_CHECK_TIMER(sk);
1660 goto ipv6_pktoptions;
1664 tcp_v6_send_reset(skb);
1667 __kfree_skb(opt_skb);
1671 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1676 /* Do you ask, what is it?
1678 1. skb was enqueued by tcp.
1679 2. skb is added to tail of read queue, rather than out of order.
1680 3. socket is not in passive state.
1681 4. Finally, it really contains options, which user wants to receive.
1684 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1685 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1686 if (np->rxopt.bits.rxinfo)
1687 np->mcast_oif = tcp_v6_iif(opt_skb);
1688 if (np->rxopt.bits.rxhlim)
1689 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1690 if (ipv6_opt_accepted(sk, opt_skb)) {
1691 skb_set_owner_r(opt_skb, sk);
1692 opt_skb = xchg(&np->pktoptions, opt_skb);
1694 __kfree_skb(opt_skb);
1695 opt_skb = xchg(&np->pktoptions, NULL);
1704 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1706 struct sk_buff *skb = *pskb;
1711 if (skb->pkt_type != PACKET_HOST)
1715 * Count it even if it's bad.
1717 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1719 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1724 if (th->doff < sizeof(struct tcphdr)/4)
1726 if (!pskb_may_pull(skb, th->doff*4))
1729 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1730 tcp_v6_checksum_init(skb) < 0))
1734 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1735 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1736 skb->len - th->doff*4);
1737 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1738 TCP_SKB_CB(skb)->when = 0;
1739 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1740 TCP_SKB_CB(skb)->sacked = 0;
1742 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1743 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1749 if (sk->sk_state == TCP_TIME_WAIT)
1752 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse;
1755 if (sk_filter(sk, skb, 0))
1756 goto discard_and_relse;
1762 if (!sock_owned_by_user(sk)) {
1763 if (!tcp_prequeue(sk, skb))
1764 ret = tcp_v6_do_rcv(sk, skb);
1766 sk_add_backlog(sk, skb);
1770 return ret ? -1 : 0;
1773 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1776 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1778 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1780 tcp_v6_send_reset(skb);
1797 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1804 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1809 skb, th, skb->len)) {
1814 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1816 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1817 tcp_tw_put((struct tcp_tw_bucket *)sk);
1821 /* Fall through to ACK */
1824 tcp_v6_timewait_ack(sk, skb);
1828 case TCP_TW_SUCCESS:;
1833 static int tcp_v6_rebuild_header(struct sock *sk)
1836 struct dst_entry *dst;
1837 struct ipv6_pinfo *np = inet6_sk(sk);
1839 dst = __sk_dst_check(sk, np->dst_cookie);
1842 struct inet_sock *inet = inet_sk(sk);
1843 struct in6_addr *final_p = NULL, final;
1846 memset(&fl, 0, sizeof(fl));
1847 fl.proto = IPPROTO_TCP;
1848 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1849 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1850 fl.fl6_flowlabel = np->flow_label;
1851 fl.oif = sk->sk_bound_dev_if;
1852 fl.fl_ip_dport = inet->dport;
1853 fl.fl_ip_sport = inet->sport;
1855 if (np->opt && np->opt->srcrt) {
1856 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1857 ipv6_addr_copy(&final, &fl.fl6_dst);
1858 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862 err = ip6_dst_lookup(sk, &dst, &fl);
1864 sk->sk_route_caps = 0;
1868 ipv6_addr_copy(&fl.fl6_dst, final_p);
1870 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1871 sk->sk_err_soft = -err;
1876 ip6_dst_store(sk, dst, NULL);
1877 sk->sk_route_caps = dst->dev->features &
1878 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1884 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1886 struct sock *sk = skb->sk;
1887 struct inet_sock *inet = inet_sk(sk);
1888 struct ipv6_pinfo *np = inet6_sk(sk);
1890 struct dst_entry *dst;
1891 struct in6_addr *final_p = NULL, final;
1893 memset(&fl, 0, sizeof(fl));
1894 fl.proto = IPPROTO_TCP;
1895 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1896 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1897 fl.fl6_flowlabel = np->flow_label;
1898 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1899 fl.oif = sk->sk_bound_dev_if;
1900 fl.fl_ip_sport = inet->sport;
1901 fl.fl_ip_dport = inet->dport;
1903 if (np->opt && np->opt->srcrt) {
1904 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1905 ipv6_addr_copy(&final, &fl.fl6_dst);
1906 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910 dst = __sk_dst_check(sk, np->dst_cookie);
1913 int err = ip6_dst_lookup(sk, &dst, &fl);
1916 sk->sk_err_soft = -err;
1921 ipv6_addr_copy(&fl.fl6_dst, final_p);
1923 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1924 sk->sk_route_caps = 0;
1929 ip6_dst_store(sk, dst, NULL);
1930 sk->sk_route_caps = dst->dev->features &
1931 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1934 skb->dst = dst_clone(dst);
1936 /* Restore final destination back after routing done */
1937 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1939 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1942 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1944 struct ipv6_pinfo *np = inet6_sk(sk);
1945 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1947 sin6->sin6_family = AF_INET6;
1948 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1949 sin6->sin6_port = inet_sk(sk)->dport;
1950 /* We do not store received flowlabel for TCP */
1951 sin6->sin6_flowinfo = 0;
1952 sin6->sin6_scope_id = 0;
1953 if (sk->sk_bound_dev_if &&
1954 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1955 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1958 static int tcp_v6_remember_stamp(struct sock *sk)
1960 /* Alas, not yet... */
1964 static struct tcp_func ipv6_specific = {
1965 .queue_xmit = tcp_v6_xmit,
1966 .send_check = tcp_v6_send_check,
1967 .rebuild_header = tcp_v6_rebuild_header,
1968 .conn_request = tcp_v6_conn_request,
1969 .syn_recv_sock = tcp_v6_syn_recv_sock,
1970 .remember_stamp = tcp_v6_remember_stamp,
1971 .net_header_len = sizeof(struct ipv6hdr),
1973 .setsockopt = ipv6_setsockopt,
1974 .getsockopt = ipv6_getsockopt,
1975 .addr2sockaddr = v6_addr2sockaddr,
1976 .sockaddr_len = sizeof(struct sockaddr_in6)
1980 * TCP over IPv4 via INET6 API
1983 static struct tcp_func ipv6_mapped = {
1984 .queue_xmit = ip_queue_xmit,
1985 .send_check = tcp_v4_send_check,
1986 .rebuild_header = tcp_v4_rebuild_header,
1987 .conn_request = tcp_v6_conn_request,
1988 .syn_recv_sock = tcp_v6_syn_recv_sock,
1989 .remember_stamp = tcp_v4_remember_stamp,
1990 .net_header_len = sizeof(struct iphdr),
1992 .setsockopt = ipv6_setsockopt,
1993 .getsockopt = ipv6_getsockopt,
1994 .addr2sockaddr = v6_addr2sockaddr,
1995 .sockaddr_len = sizeof(struct sockaddr_in6)
2000 /* NOTE: A lot of things set to zero explicitly by call to
2001 * sk_alloc() so need not be done here.
2003 static int tcp_v6_init_sock(struct sock *sk)
2005 struct tcp_sock *tp = tcp_sk(sk);
2007 skb_queue_head_init(&tp->out_of_order_queue);
2008 tcp_init_xmit_timers(sk);
2009 tcp_prequeue_init(tp);
2011 tp->rto = TCP_TIMEOUT_INIT;
2012 tp->mdev = TCP_TIMEOUT_INIT;
2014 /* So many TCP implementations out there (incorrectly) count the
2015 * initial SYN frame in their delayed-ACK and congestion control
2016 * algorithms that we must have the following bandaid to talk
2017 * efficiently to them. -DaveM
2021 /* See draft-stevens-tcpca-spec-01 for discussion of the
2022 * initialization of these values.
2024 tp->snd_ssthresh = 0x7fffffff;
2025 tp->snd_cwnd_clamp = ~0;
2026 tp->mss_cache_std = tp->mss_cache = 536;
2028 tp->reordering = sysctl_tcp_reordering;
2030 sk->sk_state = TCP_CLOSE;
2032 tp->af_specific = &ipv6_specific;
2034 sk->sk_write_space = sk_stream_write_space;
2035 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2037 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2038 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2040 atomic_inc(&tcp_sockets_allocated);
2045 static int tcp_v6_destroy_sock(struct sock *sk)
2047 extern int tcp_v4_destroy_sock(struct sock *sk);
2049 tcp_v4_destroy_sock(sk);
2050 return inet6_destroy_sock(sk);
2053 /* Proc filesystem TCPv6 sock list dumping. */
2054 static void get_openreq6(struct seq_file *seq,
2055 struct sock *sk, struct open_request *req, int i, int uid)
2057 struct in6_addr *dest, *src;
2058 int ttd = req->expires - jiffies;
2063 src = &req->af.v6_req.loc_addr;
2064 dest = &req->af.v6_req.rmt_addr;
2066 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2067 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2069 src->s6_addr32[0], src->s6_addr32[1],
2070 src->s6_addr32[2], src->s6_addr32[3],
2071 ntohs(inet_sk(sk)->sport),
2072 dest->s6_addr32[0], dest->s6_addr32[1],
2073 dest->s6_addr32[2], dest->s6_addr32[3],
2074 ntohs(req->rmt_port),
2076 0,0, /* could print option size, but that is af dependent. */
2077 1, /* timers active (only the expire timer) */
2078 jiffies_to_clock_t(ttd),
2081 0, /* non standard timer */
2082 0, /* open_requests have no inode */
2086 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2088 struct in6_addr *dest, *src;
2091 unsigned long timer_expires;
2092 struct inet_sock *inet = inet_sk(sp);
2093 struct tcp_sock *tp = tcp_sk(sp);
2094 struct ipv6_pinfo *np = inet6_sk(sp);
2097 src = &np->rcv_saddr;
2098 destp = ntohs(inet->dport);
2099 srcp = ntohs(inet->sport);
2100 if (tp->pending == TCP_TIME_RETRANS) {
2102 timer_expires = tp->timeout;
2103 } else if (tp->pending == TCP_TIME_PROBE0) {
2105 timer_expires = tp->timeout;
2106 } else if (timer_pending(&sp->sk_timer)) {
2108 timer_expires = sp->sk_timer.expires;
2111 timer_expires = jiffies;
2115 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2116 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2118 src->s6_addr32[0], src->s6_addr32[1],
2119 src->s6_addr32[2], src->s6_addr32[3], srcp,
2120 dest->s6_addr32[0], dest->s6_addr32[1],
2121 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2123 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2125 jiffies_to_clock_t(timer_expires - jiffies),
2130 atomic_read(&sp->sk_refcnt), sp,
2131 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2132 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136 static void get_timewait6_sock(struct seq_file *seq,
2137 struct tcp_tw_bucket *tw, int i)
2139 struct in6_addr *dest, *src;
2141 int ttd = tw->tw_ttd - jiffies;
2146 dest = &tw->tw_v6_daddr;
2147 src = &tw->tw_v6_rcv_saddr;
2148 destp = ntohs(tw->tw_dport);
2149 srcp = ntohs(tw->tw_sport);
2152 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2153 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2155 src->s6_addr32[0], src->s6_addr32[1],
2156 src->s6_addr32[2], src->s6_addr32[3], srcp,
2157 dest->s6_addr32[0], dest->s6_addr32[1],
2158 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2159 tw->tw_substate, 0, 0,
2160 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2161 atomic_read(&tw->tw_refcnt), tw);
2164 #ifdef CONFIG_PROC_FS
2165 static int tcp6_seq_show(struct seq_file *seq, void *v)
2167 struct tcp_iter_state *st;
2169 if (v == SEQ_START_TOKEN) {
2174 "st tx_queue rx_queue tr tm->when retrnsmt"
2175 " uid timeout inode\n");
2180 switch (st->state) {
2181 case TCP_SEQ_STATE_LISTENING:
2182 case TCP_SEQ_STATE_ESTABLISHED:
2183 get_tcp6_sock(seq, v, st->num);
2185 case TCP_SEQ_STATE_OPENREQ:
2186 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2188 case TCP_SEQ_STATE_TIME_WAIT:
2189 get_timewait6_sock(seq, v, st->num);
2196 static struct file_operations tcp6_seq_fops;
2197 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2198 .owner = THIS_MODULE,
2201 .seq_show = tcp6_seq_show,
2202 .seq_fops = &tcp6_seq_fops,
2205 int __init tcp6_proc_init(void)
2207 return tcp_proc_register(&tcp6_seq_afinfo);
2210 void tcp6_proc_exit(void)
2212 tcp_proc_unregister(&tcp6_seq_afinfo);
2216 struct proto tcpv6_prot = {
2218 .owner = THIS_MODULE,
2220 .connect = tcp_v6_connect,
2221 .disconnect = tcp_disconnect,
2222 .accept = tcp_accept,
2224 .init = tcp_v6_init_sock,
2225 .destroy = tcp_v6_destroy_sock,
2226 .shutdown = tcp_shutdown,
2227 .setsockopt = tcp_setsockopt,
2228 .getsockopt = tcp_getsockopt,
2229 .sendmsg = tcp_sendmsg,
2230 .recvmsg = tcp_recvmsg,
2231 .backlog_rcv = tcp_v6_do_rcv,
2232 .hash = tcp_v6_hash,
2233 .unhash = tcp_unhash,
2234 .get_port = tcp_v6_get_port,
2235 .enter_memory_pressure = tcp_enter_memory_pressure,
2236 .sockets_allocated = &tcp_sockets_allocated,
2237 .memory_allocated = &tcp_memory_allocated,
2238 .memory_pressure = &tcp_memory_pressure,
2239 .sysctl_mem = sysctl_tcp_mem,
2240 .sysctl_wmem = sysctl_tcp_wmem,
2241 .sysctl_rmem = sysctl_tcp_rmem,
2242 .max_header = MAX_TCP_HEADER,
2243 .obj_size = sizeof(struct tcp6_sock),
2246 static struct inet6_protocol tcpv6_protocol = {
2247 .handler = tcp_v6_rcv,
2248 .err_handler = tcp_v6_err,
2249 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2252 extern struct proto_ops inet6_stream_ops;
2254 static struct inet_protosw tcpv6_protosw = {
2255 .type = SOCK_STREAM,
2256 .protocol = IPPROTO_TCP,
2257 .prot = &tcpv6_prot,
2258 .ops = &inet6_stream_ops,
2261 .flags = INET_PROTOSW_PERMANENT,
2264 void __init tcpv6_init(void)
2266 /* register inet6 protocol */
2267 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2268 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2269 inet6_register_protosw(&tcpv6_protosw);