3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
61 #include <asm/uaccess.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 static void tcp_v6_send_reset(struct sk_buff *skb);
67 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
68 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
71 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
72 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74 static struct tcp_func ipv6_mapped;
75 static struct tcp_func ipv6_specific;
77 /* I have no idea if this is a good hash for v6 or not. -DaveM */
78 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
79 struct in6_addr *faddr, u16 fport)
81 int hashent = (lport ^ fport);
83 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
84 hashent ^= hashent>>16;
85 hashent ^= hashent>>8;
86 return (hashent & (tcp_ehash_size - 1));
89 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 struct inet_opt *inet = inet_sk(sk);
92 struct ipv6_pinfo *np = inet6_sk(sk);
93 struct in6_addr *laddr = &np->rcv_saddr;
94 struct in6_addr *faddr = &np->daddr;
95 __u16 lport = inet->num;
96 __u16 fport = inet->dport;
97 return tcp_v6_hashfn(laddr, lport, faddr, fport);
100 static inline int tcp_v6_bind_conflict(struct sock *sk,
101 struct tcp_bind_bucket *tb)
104 struct hlist_node *node;
106 /* We must walk the whole port owner list in this case. -DaveM */
107 sk_for_each_bound(sk2, node, &tb->owners) {
109 (!sk->sk_bound_dev_if ||
110 !sk2->sk_bound_dev_if ||
111 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
112 (!sk->sk_reuse || !sk2->sk_reuse ||
113 sk2->sk_state == TCP_LISTEN) &&
114 ipv6_rcv_saddr_equal(sk, sk2))
121 /* Grrr, addr_type already calculated by caller, but I don't want
122 * to add some silly "cookie" argument to this method just for that.
123 * But it doesn't matter, the recalculation is in the rarest path
124 * this function ever takes.
126 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 struct tcp_bind_hashbucket *head;
129 struct tcp_bind_bucket *tb;
130 struct hlist_node *node;
135 int low = sysctl_local_port_range[0];
136 int high = sysctl_local_port_range[1];
137 int remaining = (high - low) + 1;
140 spin_lock(&tcp_portalloc_lock);
141 rover = tcp_port_rover;
143 if ((rover < low) || (rover > high))
145 head = &tcp_bhash[tcp_bhashfn(rover)];
146 spin_lock(&head->lock);
147 tb_for_each(tb, node, &head->chain)
148 if (tb->port == rover)
152 spin_unlock(&head->lock);
153 } while (--remaining > 0);
154 tcp_port_rover = rover;
155 spin_unlock(&tcp_portalloc_lock);
157 /* Exhausted local port range during search? */
162 /* OK, here is the one we will use. */
165 head = &tcp_bhash[tcp_bhashfn(snum)];
166 spin_lock(&head->lock);
167 tb_for_each(tb, node, &head->chain)
168 if (tb->port == snum)
174 if (tb && !hlist_empty(&tb->owners)) {
175 if (tb->fastreuse > 0 && sk->sk_reuse &&
176 sk->sk_state != TCP_LISTEN) {
180 if (tcp_v6_bind_conflict(sk, tb))
186 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
188 if (hlist_empty(&tb->owners)) {
189 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
193 } else if (tb->fastreuse &&
194 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
198 if (!tcp_sk(sk)->bind_hash)
199 tcp_bind_hash(sk, tb, snum);
200 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
204 spin_unlock(&head->lock);
210 static __inline__ void __tcp_v6_hash(struct sock *sk)
212 struct hlist_head *list;
215 BUG_TRAP(sk_unhashed(sk));
217 if (sk->sk_state == TCP_LISTEN) {
218 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
219 lock = &tcp_lhash_lock;
222 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
223 list = &tcp_ehash[sk->sk_hashent].chain;
224 lock = &tcp_ehash[sk->sk_hashent].lock;
228 __sk_add_node(sk, list);
229 sock_prot_inc_use(sk->sk_prot);
234 static void tcp_v6_hash(struct sock *sk)
236 if (sk->sk_state != TCP_CLOSE) {
237 struct tcp_opt *tp = tcp_sk(sk);
239 if (tp->af_specific == &ipv6_mapped) {
249 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
252 struct hlist_node *node;
253 struct sock *result = NULL;
257 read_lock(&tcp_lhash_lock);
258 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
259 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
260 struct ipv6_pinfo *np = inet6_sk(sk);
263 if (!ipv6_addr_any(&np->rcv_saddr)) {
264 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
268 if (sk->sk_bound_dev_if) {
269 if (sk->sk_bound_dev_if != dif)
277 if (score > hiscore) {
285 read_unlock(&tcp_lhash_lock);
289 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
290 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
292 * The sockhash lock must be held as a reader here.
295 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
296 struct in6_addr *daddr, u16 hnum,
299 struct tcp_ehash_bucket *head;
301 struct hlist_node *node;
302 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
305 /* Optimize here for direct hit, only listening connections can
306 * have wildcards anyways.
308 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
309 head = &tcp_ehash[hash];
310 read_lock(&head->lock);
311 sk_for_each(sk, node, &head->chain) {
312 /* For IPV6 do the cheaper port and family tests first. */
313 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
314 goto hit; /* You sunk my battleship! */
316 /* Must check for a TIME_WAIT'er before going to listener hash. */
317 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
318 /* FIXME: acme: check this... */
319 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
321 if(*((__u32 *)&(tw->tw_dport)) == ports &&
322 sk->sk_family == PF_INET6) {
323 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
324 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
325 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
329 read_unlock(&head->lock);
334 read_unlock(&head->lock);
339 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
340 struct in6_addr *daddr, u16 hnum,
345 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
350 return tcp_v6_lookup_listener(daddr, hnum, dif);
353 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
354 struct in6_addr *daddr, u16 dport,
360 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
368 * Open request hash tables.
371 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
375 a = raddr->s6_addr32[0];
376 b = raddr->s6_addr32[1];
377 c = raddr->s6_addr32[2];
379 a += JHASH_GOLDEN_RATIO;
380 b += JHASH_GOLDEN_RATIO;
382 __jhash_mix(a, b, c);
384 a += raddr->s6_addr32[3];
386 __jhash_mix(a, b, c);
388 return c & (TCP_SYNQ_HSIZE - 1);
391 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
392 struct open_request ***prevp,
394 struct in6_addr *raddr,
395 struct in6_addr *laddr,
398 struct tcp_listen_opt *lopt = tp->listen_opt;
399 struct open_request *req, **prev;
401 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
402 (req = *prev) != NULL;
403 prev = &req->dl_next) {
404 if (req->rmt_port == rport &&
405 req->class->family == AF_INET6 &&
406 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
407 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
408 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
409 BUG_TRAP(req->sk == NULL);
418 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
419 struct in6_addr *saddr,
420 struct in6_addr *daddr,
423 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
426 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
428 if (skb->protocol == htons(ETH_P_IPV6)) {
429 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
430 skb->nh.ipv6h->saddr.s6_addr32,
434 return secure_tcp_sequence_number(skb->nh.iph->daddr,
441 static int tcp_v6_check_established(struct sock *sk)
443 struct inet_opt *inet = inet_sk(sk);
444 struct ipv6_pinfo *np = inet6_sk(sk);
445 struct in6_addr *daddr = &np->rcv_saddr;
446 struct in6_addr *saddr = &np->daddr;
447 int dif = sk->sk_bound_dev_if;
448 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
449 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
450 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
452 struct hlist_node *node;
453 struct tcp_tw_bucket *tw;
455 write_lock_bh(&head->lock);
457 /* Check TIME-WAIT sockets first. */
458 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
459 tw = (struct tcp_tw_bucket*)sk2;
461 if(*((__u32 *)&(tw->tw_dport)) == ports &&
462 sk2->sk_family == PF_INET6 &&
463 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
464 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
465 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
466 struct tcp_opt *tp = tcp_sk(sk);
468 if (tw->tw_ts_recent_stamp) {
469 /* See comment in tcp_ipv4.c */
470 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
473 tp->ts_recent = tw->tw_ts_recent;
474 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
483 /* And established part... */
484 sk_for_each(sk2, node, &head->chain) {
485 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
490 BUG_TRAP(sk_unhashed(sk));
491 __sk_add_node(sk, &head->chain);
492 sk->sk_hashent = hash;
493 sock_prot_inc_use(sk->sk_prot);
494 write_unlock_bh(&head->lock);
497 /* Silly. Should hash-dance instead... */
499 tcp_tw_deschedule(tw);
500 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
508 write_unlock_bh(&head->lock);
509 return -EADDRNOTAVAIL;
512 static int tcp_v6_hash_connect(struct sock *sk)
514 struct tcp_bind_hashbucket *head;
515 struct tcp_bind_bucket *tb;
518 if (inet_sk(sk)->num == 0) {
519 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
522 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
525 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
528 spin_lock_bh(&head->lock);
530 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
532 spin_unlock_bh(&head->lock);
535 spin_unlock_bh(&head->lock);
536 return tcp_v6_check_established(sk);
540 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
542 return IP6CB(skb)->iif;
545 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
548 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
549 struct inet_opt *inet = inet_sk(sk);
550 struct ipv6_pinfo *np = inet6_sk(sk);
551 struct tcp_opt *tp = tcp_sk(sk);
552 struct in6_addr *saddr = NULL;
554 struct dst_entry *dst;
558 if (addr_len < SIN6_LEN_RFC2133)
561 if (usin->sin6_family != AF_INET6)
562 return(-EAFNOSUPPORT);
564 memset(&fl, 0, sizeof(fl));
567 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
568 IP6_ECN_flow_init(fl.fl6_flowlabel);
569 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
570 struct ip6_flowlabel *flowlabel;
571 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
572 if (flowlabel == NULL)
574 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
575 fl6_sock_release(flowlabel);
580 * connect() to INADDR_ANY means loopback (BSD'ism).
583 if(ipv6_addr_any(&usin->sin6_addr))
584 usin->sin6_addr.s6_addr[15] = 0x1;
586 addr_type = ipv6_addr_type(&usin->sin6_addr);
588 if(addr_type & IPV6_ADDR_MULTICAST)
591 if (addr_type&IPV6_ADDR_LINKLOCAL) {
592 if (addr_len >= sizeof(struct sockaddr_in6) &&
593 usin->sin6_scope_id) {
594 /* If interface is set while binding, indices
597 if (sk->sk_bound_dev_if &&
598 sk->sk_bound_dev_if != usin->sin6_scope_id)
601 sk->sk_bound_dev_if = usin->sin6_scope_id;
604 /* Connect to link-local address requires an interface */
605 if (!sk->sk_bound_dev_if)
609 if (tp->ts_recent_stamp &&
610 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
612 tp->ts_recent_stamp = 0;
616 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
617 np->flow_label = fl.fl6_flowlabel;
623 if (addr_type == IPV6_ADDR_MAPPED) {
624 u32 exthdrlen = tp->ext_header_len;
625 struct sockaddr_in sin;
627 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
629 if (__ipv6_only_sock(sk))
632 sin.sin_family = AF_INET;
633 sin.sin_port = usin->sin6_port;
634 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
636 tp->af_specific = &ipv6_mapped;
637 sk->sk_backlog_rcv = tcp_v4_do_rcv;
639 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
642 tp->ext_header_len = exthdrlen;
643 tp->af_specific = &ipv6_specific;
644 sk->sk_backlog_rcv = tcp_v6_do_rcv;
647 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
649 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
656 if (!ipv6_addr_any(&np->rcv_saddr))
657 saddr = &np->rcv_saddr;
659 fl.proto = IPPROTO_TCP;
660 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
661 ipv6_addr_copy(&fl.fl6_src,
662 (saddr ? saddr : &np->saddr));
663 fl.oif = sk->sk_bound_dev_if;
664 fl.fl_ip_dport = usin->sin6_port;
665 fl.fl_ip_sport = inet->sport;
667 if (np->opt && np->opt->srcrt) {
668 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
669 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
672 err = ip6_dst_lookup(sk, &dst, &fl);
679 ipv6_addr_copy(&np->rcv_saddr, saddr);
682 /* set the source address */
683 ipv6_addr_copy(&np->saddr, saddr);
684 inet->rcv_saddr = LOOPBACK4_IPV6;
686 ip6_dst_store(sk, dst, NULL);
687 sk->sk_route_caps = dst->dev->features &
688 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
690 tp->ext_header_len = 0;
692 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
693 tp->ext2_header_len = dst->header_len;
695 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
697 inet->dport = usin->sin6_port;
699 tcp_set_state(sk, TCP_SYN_SENT);
700 err = tcp_v6_hash_connect(sk);
705 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
710 err = tcp_connect(sk);
717 tcp_set_state(sk, TCP_CLOSE);
721 sk->sk_route_caps = 0;
725 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
726 int type, int code, int offset, __u32 info)
728 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
729 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
730 struct ipv6_pinfo *np;
736 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
739 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
743 if (sk->sk_state == TCP_TIME_WAIT) {
744 tcp_tw_put((struct tcp_tw_bucket*)sk);
749 if (sock_owned_by_user(sk))
750 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
752 if (sk->sk_state == TCP_CLOSE)
756 seq = ntohl(th->seq);
757 if (sk->sk_state != TCP_LISTEN &&
758 !between(seq, tp->snd_una, tp->snd_nxt)) {
759 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
765 if (type == ICMPV6_PKT_TOOBIG) {
766 struct dst_entry *dst = NULL;
768 if (sock_owned_by_user(sk))
770 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
773 /* icmp should have updated the destination cache entry */
774 dst = __sk_dst_check(sk, np->dst_cookie);
777 struct inet_opt *inet = inet_sk(sk);
780 /* BUGGG_FUTURE: Again, it is not clear how
781 to handle rthdr case. Ignore this complexity
784 memset(&fl, 0, sizeof(fl));
785 fl.proto = IPPROTO_TCP;
786 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
787 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
788 fl.oif = sk->sk_bound_dev_if;
789 fl.fl_ip_dport = inet->dport;
790 fl.fl_ip_sport = inet->sport;
792 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
793 sk->sk_err_soft = -err;
799 if (tp->pmtu_cookie > dst_pmtu(dst)) {
800 tcp_sync_mss(sk, dst_pmtu(dst));
801 tcp_simple_retransmit(sk);
802 } /* else let the usual retransmit timer handle it */
807 icmpv6_err_convert(type, code, &err);
809 /* Might be for an open_request */
810 switch (sk->sk_state) {
811 struct open_request *req, **prev;
813 if (sock_owned_by_user(sk))
816 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
817 &hdr->saddr, tcp_v6_iif(skb));
821 /* ICMPs are not backlogged, hence we cannot get
822 * an established socket here.
824 BUG_TRAP(req->sk == NULL);
826 if (seq != req->snt_isn) {
827 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
831 tcp_synq_drop(sk, req, prev);
835 case TCP_SYN_RECV: /* Cannot happen.
836 It can, it SYNs are crossed. --ANK */
837 if (!sock_owned_by_user(sk)) {
838 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
840 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
844 sk->sk_err_soft = err;
848 if (!sock_owned_by_user(sk) && np->recverr) {
850 sk->sk_error_report(sk);
852 sk->sk_err_soft = err;
860 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
861 struct dst_entry *dst)
863 struct ipv6_pinfo *np = inet6_sk(sk);
864 struct sk_buff * skb;
865 struct ipv6_txoptions *opt = NULL;
869 memset(&fl, 0, sizeof(fl));
870 fl.proto = IPPROTO_TCP;
871 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
872 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
873 fl.fl6_flowlabel = 0;
874 fl.oif = req->af.v6_req.iif;
875 fl.fl_ip_dport = req->rmt_port;
876 fl.fl_ip_sport = inet_sk(sk)->sport;
881 np->rxopt.bits.srcrt == 2 &&
882 req->af.v6_req.pktopts) {
883 struct sk_buff *pktopts = req->af.v6_req.pktopts;
884 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
886 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
889 if (opt && opt->srcrt) {
890 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
891 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
894 err = ip6_dst_lookup(sk, &dst, &fl);
899 skb = tcp_make_synack(sk, dst, req);
901 struct tcphdr *th = skb->h.th;
903 th->check = tcp_v6_check(th, skb->len,
904 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
905 csum_partial((char *)th, skb->len, skb->csum));
907 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
908 err = ip6_xmit(sk, skb, &fl, opt, 0);
909 if (err == NET_XMIT_CN)
915 if (opt && opt != np->opt)
916 sock_kfree_s(sk, opt, opt->tot_len);
920 static void tcp_v6_or_free(struct open_request *req)
922 if (req->af.v6_req.pktopts)
923 kfree_skb(req->af.v6_req.pktopts);
926 static struct or_calltable or_ipv6 = {
928 .rtx_syn_ack = tcp_v6_send_synack,
929 .send_ack = tcp_v6_or_send_ack,
930 .destructor = tcp_v6_or_free,
931 .send_reset = tcp_v6_send_reset
934 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
936 struct ipv6_pinfo *np = inet6_sk(sk);
937 struct inet6_skb_parm *opt = IP6CB(skb);
940 if ((opt->hop && np->rxopt.bits.hopopts) ||
941 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
942 np->rxopt.bits.rxflow) ||
943 (opt->srcrt && np->rxopt.bits.srcrt) ||
944 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
951 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
954 struct ipv6_pinfo *np = inet6_sk(sk);
956 if (skb->ip_summed == CHECKSUM_HW) {
957 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
958 skb->csum = offsetof(struct tcphdr, check);
960 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
961 csum_partial((char *)th, th->doff<<2,
967 static void tcp_v6_send_reset(struct sk_buff *skb)
969 struct tcphdr *th = skb->h.th, *t1;
970 struct sk_buff *buff;
976 if (!ipv6_unicast_destination(skb))
980 * We need to grab some memory, and put together an RST,
981 * and then put it into the queue to be sent.
984 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
988 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
990 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
992 /* Swap the send and the receive. */
993 memset(t1, 0, sizeof(*t1));
994 t1->dest = th->source;
995 t1->source = th->dest;
996 t1->doff = sizeof(*t1)/4;
1000 t1->seq = th->ack_seq;
1003 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1004 + skb->len - (th->doff<<2));
1007 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1009 memset(&fl, 0, sizeof(fl));
1010 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1011 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1013 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1014 sizeof(*t1), IPPROTO_TCP,
1017 fl.proto = IPPROTO_TCP;
1018 fl.oif = tcp_v6_iif(skb);
1019 fl.fl_ip_dport = t1->dest;
1020 fl.fl_ip_sport = t1->source;
1022 /* sk = NULL, but it is safe for now. RST socket required. */
1023 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1024 ip6_xmit(NULL, buff, &fl, NULL, 0);
1025 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1026 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1033 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1035 struct tcphdr *th = skb->h.th, *t1;
1036 struct sk_buff *buff;
1038 int tot_len = sizeof(struct tcphdr);
1040 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1044 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1049 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1051 /* Swap the send and the receive. */
1052 memset(t1, 0, sizeof(*t1));
1053 t1->dest = th->source;
1054 t1->source = th->dest;
1055 t1->doff = tot_len/4;
1056 t1->seq = htonl(seq);
1057 t1->ack_seq = htonl(ack);
1059 t1->window = htons(win);
1062 u32 *ptr = (u32*)(t1 + 1);
1063 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1064 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1065 *ptr++ = htonl(tcp_time_stamp);
1069 buff->csum = csum_partial((char *)t1, tot_len, 0);
1071 memset(&fl, 0, sizeof(fl));
1072 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1073 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1075 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1076 tot_len, IPPROTO_TCP,
1079 fl.proto = IPPROTO_TCP;
1080 fl.oif = tcp_v6_iif(skb);
1081 fl.fl_ip_dport = t1->dest;
1082 fl.fl_ip_sport = t1->source;
1084 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1085 ip6_xmit(NULL, buff, &fl, NULL, 0);
1086 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1095 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1097 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1098 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1103 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1105 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1109 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1111 struct open_request *req, **prev;
1112 struct tcphdr *th = skb->h.th;
1113 struct tcp_opt *tp = tcp_sk(sk);
1116 /* Find possible connection requests. */
1117 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1118 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1120 return tcp_check_req(sk, skb, req, prev);
1122 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1124 &skb->nh.ipv6h->daddr,
1129 if (nsk->sk_state != TCP_TIME_WAIT) {
1133 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1137 #if 0 /*def CONFIG_SYN_COOKIES*/
1138 if (!th->rst && !th->syn && th->ack)
1139 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1144 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1146 struct tcp_opt *tp = tcp_sk(sk);
1147 struct tcp_listen_opt *lopt = tp->listen_opt;
1148 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1151 req->expires = jiffies + TCP_TIMEOUT_INIT;
1153 req->dl_next = lopt->syn_table[h];
1155 write_lock(&tp->syn_wait_lock);
1156 lopt->syn_table[h] = req;
1157 write_unlock(&tp->syn_wait_lock);
1159 #ifdef CONFIG_ACCEPT_QUEUES
1160 tcp_synq_added(sk, req);
1167 /* FIXME: this is substantially similar to the ipv4 code.
1168 * Can some kind of merge be done? -- erics
1170 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1172 struct ipv6_pinfo *np = inet6_sk(sk);
1173 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1174 struct open_request *req = NULL;
1175 __u32 isn = TCP_SKB_CB(skb)->when;
1176 #ifdef CONFIG_ACCEPT_QUEUES
1180 if (skb->protocol == htons(ETH_P_IP))
1181 return tcp_v4_conn_request(sk, skb);
1183 if (!ipv6_unicast_destination(skb))
1188 * There are no SYN attacks on IPv6, yet...
1190 if (tcp_synq_is_full(sk) && !isn) {
1191 if (net_ratelimit())
1192 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1196 #ifdef CONFIG_ACCEPT_QUEUES
1197 class = (skb->nfmark <= 0) ? 0 :
1198 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1200 * Accept only if the class has shares set or if the default class
1201 * i.e. class 0 has shares
1203 if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1204 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1210 if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1212 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1217 req = tcp_openreq_alloc();
1221 tcp_clear_options(&tmptp);
1222 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1223 tmptp.user_mss = tp->user_mss;
1225 tcp_parse_options(skb, &tmptp, 0);
1227 tmptp.tstamp_ok = tmptp.saw_tstamp;
1228 tcp_openreq_init(req, &tmptp, skb);
1229 #ifdef CONFIG_ACCEPT_QUEUES
1230 req->acceptq_class = class;
1231 req->acceptq_time_stamp = jiffies;
1233 req->class = &or_ipv6;
1234 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1235 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1236 TCP_ECN_create_request(req, skb->h.th);
1237 req->af.v6_req.pktopts = NULL;
1238 if (ipv6_opt_accepted(sk, skb) ||
1239 np->rxopt.bits.rxinfo ||
1240 np->rxopt.bits.rxhlim) {
1241 atomic_inc(&skb->users);
1242 req->af.v6_req.pktopts = skb;
1244 req->af.v6_req.iif = sk->sk_bound_dev_if;
1246 /* So that link locals have meaning */
1247 if (!sk->sk_bound_dev_if &&
1248 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1249 req->af.v6_req.iif = tcp_v6_iif(skb);
1252 isn = tcp_v6_init_sequence(sk,skb);
1256 if (tcp_v6_send_synack(sk, req, NULL))
1259 tcp_v6_synq_add(sk, req);
1265 tcp_openreq_free(req);
1267 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1268 return 0; /* don't send reset */
1271 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1272 struct open_request *req,
1273 struct dst_entry *dst)
1275 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1276 struct tcp6_sock *newtcp6sk;
1277 struct inet_opt *newinet;
1278 struct tcp_opt *newtp;
1280 struct ipv6_txoptions *opt;
1282 if (skb->protocol == htons(ETH_P_IP)) {
1287 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1292 newtcp6sk = (struct tcp6_sock *)newsk;
1293 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1295 newinet = inet_sk(newsk);
1296 newnp = inet6_sk(newsk);
1297 newtp = tcp_sk(newsk);
1299 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1301 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1304 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1307 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1309 newtp->af_specific = &ipv6_mapped;
1310 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1311 newnp->pktoptions = NULL;
1313 newnp->mcast_oif = tcp_v6_iif(skb);
1314 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1316 /* Charge newly allocated IPv6 socket. Though it is mapped,
1319 #ifdef INET_REFCNT_DEBUG
1320 atomic_inc(&inet6_sock_nr);
1323 /* It is tricky place. Until this moment IPv4 tcp
1324 worked with IPv6 af_tcp.af_specific.
1327 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1334 #ifdef CONFIG_ACCEPT_QUEUES
1335 if (sk_acceptq_is_full(sk, req->acceptq_class))
1337 if (sk_acceptq_is_full(sk))
1341 if (np->rxopt.bits.srcrt == 2 &&
1342 opt == NULL && req->af.v6_req.pktopts) {
1343 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1345 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1351 memset(&fl, 0, sizeof(fl));
1352 fl.proto = IPPROTO_TCP;
1353 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1354 if (opt && opt->srcrt) {
1355 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1356 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1358 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1359 fl.oif = sk->sk_bound_dev_if;
1360 fl.fl_ip_dport = req->rmt_port;
1361 fl.fl_ip_sport = inet_sk(sk)->sport;
1363 if (ip6_dst_lookup(sk, &dst, &fl))
1367 newsk = tcp_create_openreq_child(sk, req, skb);
1371 /* Charge newly allocated IPv6 socket */
1372 #ifdef INET_REFCNT_DEBUG
1373 atomic_inc(&inet6_sock_nr);
1376 ip6_dst_store(newsk, dst, NULL);
1377 newsk->sk_route_caps = dst->dev->features &
1378 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1380 newtcp6sk = (struct tcp6_sock *)newsk;
1381 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1383 newtp = tcp_sk(newsk);
1384 newinet = inet_sk(newsk);
1385 newnp = inet6_sk(newsk);
1387 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1389 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1390 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1391 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1392 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1394 /* Now IPv6 options...
1396 First: no IPv4 options.
1398 newinet->opt = NULL;
1401 newnp->rxopt.all = np->rxopt.all;
1403 /* Clone pktoptions received with SYN */
1404 newnp->pktoptions = NULL;
1405 if (req->af.v6_req.pktopts) {
1406 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1408 kfree_skb(req->af.v6_req.pktopts);
1409 req->af.v6_req.pktopts = NULL;
1410 if (newnp->pktoptions)
1411 skb_set_owner_r(newnp->pktoptions, newsk);
1414 newnp->mcast_oif = tcp_v6_iif(skb);
1415 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1417 /* Clone native IPv6 options from listening socket (if any)
1419 Yes, keeping reference count would be much more clever,
1420 but we make one more one thing there: reattach optmem
1424 newnp->opt = ipv6_dup_options(newsk, opt);
1426 sock_kfree_s(sk, opt, opt->tot_len);
1429 newtp->ext_header_len = 0;
1431 newtp->ext_header_len = newnp->opt->opt_nflen +
1432 newnp->opt->opt_flen;
1433 newtp->ext2_header_len = dst->header_len;
1435 tcp_sync_mss(newsk, dst_pmtu(dst));
1436 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1437 tcp_initialize_rcv_mss(newsk);
1439 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1441 __tcp_v6_hash(newsk);
1442 tcp_inherit_port(sk, newsk);
1447 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1449 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1450 if (opt && opt != np->opt)
1451 sock_kfree_s(sk, opt, opt->tot_len);
1456 static int tcp_v6_checksum_init(struct sk_buff *skb)
1458 if (skb->ip_summed == CHECKSUM_HW) {
1459 skb->ip_summed = CHECKSUM_UNNECESSARY;
1460 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1461 &skb->nh.ipv6h->daddr,skb->csum))
1463 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1465 if (skb->len <= 76) {
1466 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1467 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1469 skb->ip_summed = CHECKSUM_UNNECESSARY;
1471 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1472 &skb->nh.ipv6h->daddr,0);
1477 /* The socket must have it's spinlock held when we get
1480 * We have a potential double-lock case here, so even when
1481 * doing backlog processing we use the BH locking scheme.
1482 * This is because we cannot sleep with the original spinlock
1485 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1487 struct ipv6_pinfo *np = inet6_sk(sk);
1489 struct sk_buff *opt_skb = NULL;
1491 /* Imagine: socket is IPv6. IPv4 packet arrives,
1492 goes to IPv4 receive handler and backlogged.
1493 From backlog it always goes here. Kerboom...
1494 Fortunately, tcp_rcv_established and rcv_established
1495 handle them correctly, but it is not case with
1496 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1499 if (skb->protocol == htons(ETH_P_IP))
1500 return tcp_v4_do_rcv(sk, skb);
1502 if (sk_filter(sk, skb, 0))
1506 * socket locking is here for SMP purposes as backlog rcv
1507 * is currently called with bh processing disabled.
1510 /* Do Stevens' IPV6_PKTOPTIONS.
1512 Yes, guys, it is the only place in our code, where we
1513 may make it not affecting IPv4.
1514 The rest of code is protocol independent,
1515 and I do not like idea to uglify IPv4.
1517 Actually, all the idea behind IPV6_PKTOPTIONS
1518 looks not very well thought. For now we latch
1519 options, received in the last packet, enqueued
1520 by tcp. Feel free to propose better solution.
1524 opt_skb = skb_clone(skb, GFP_ATOMIC);
1526 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1527 TCP_CHECK_TIMER(sk);
1528 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1530 TCP_CHECK_TIMER(sk);
1532 goto ipv6_pktoptions;
1536 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1539 if (sk->sk_state == TCP_LISTEN) {
1540 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1545 * Queue it on the new socket if the new socket is active,
1546 * otherwise we just shortcircuit this and continue with
1550 if (tcp_child_process(sk, nsk, skb))
1553 __kfree_skb(opt_skb);
1558 TCP_CHECK_TIMER(sk);
1559 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1561 TCP_CHECK_TIMER(sk);
1563 goto ipv6_pktoptions;
1567 tcp_v6_send_reset(skb);
1570 __kfree_skb(opt_skb);
1574 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1579 /* Do you ask, what is it?
1581 1. skb was enqueued by tcp.
1582 2. skb is added to tail of read queue, rather than out of order.
1583 3. socket is not in passive state.
1584 4. Finally, it really contains options, which user wants to receive.
1587 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1588 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1589 if (np->rxopt.bits.rxinfo)
1590 np->mcast_oif = tcp_v6_iif(opt_skb);
1591 if (np->rxopt.bits.rxhlim)
1592 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1593 if (ipv6_opt_accepted(sk, opt_skb)) {
1594 skb_set_owner_r(opt_skb, sk);
1595 opt_skb = xchg(&np->pktoptions, opt_skb);
1597 __kfree_skb(opt_skb);
1598 opt_skb = xchg(&np->pktoptions, NULL);
1607 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1609 struct sk_buff *skb = *pskb;
1614 if (skb->pkt_type != PACKET_HOST)
1618 * Count it even if it's bad.
1620 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1622 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1627 if (th->doff < sizeof(struct tcphdr)/4)
1629 if (!pskb_may_pull(skb, th->doff*4))
1632 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1633 tcp_v6_checksum_init(skb) < 0))
1637 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1638 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1639 skb->len - th->doff*4);
1640 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1641 TCP_SKB_CB(skb)->when = 0;
1642 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1643 TCP_SKB_CB(skb)->sacked = 0;
1645 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1646 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1652 if (sk->sk_state == TCP_TIME_WAIT)
1655 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1656 goto discard_and_relse;
1658 if (sk_filter(sk, skb, 0))
1659 goto discard_and_relse;
1665 if (!sock_owned_by_user(sk)) {
1666 if (!tcp_prequeue(sk, skb))
1667 ret = tcp_v6_do_rcv(sk, skb);
1669 sk_add_backlog(sk, skb);
1673 return ret ? -1 : 0;
1676 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1679 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1681 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1683 tcp_v6_send_reset(skb);
1700 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1701 tcp_tw_put((struct tcp_tw_bucket *) sk);
1705 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1706 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1707 tcp_tw_put((struct tcp_tw_bucket *) sk);
1711 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1712 skb, th, skb->len)) {
1717 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1719 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1720 tcp_tw_put((struct tcp_tw_bucket *)sk);
1724 /* Fall through to ACK */
1727 tcp_v6_timewait_ack(sk, skb);
1731 case TCP_TW_SUCCESS:;
1736 static int tcp_v6_rebuild_header(struct sock *sk)
1739 struct dst_entry *dst;
1740 struct ipv6_pinfo *np = inet6_sk(sk);
1742 dst = __sk_dst_check(sk, np->dst_cookie);
1745 struct inet_opt *inet = inet_sk(sk);
1748 memset(&fl, 0, sizeof(fl));
1749 fl.proto = IPPROTO_TCP;
1750 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1751 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1752 fl.fl6_flowlabel = np->flow_label;
1753 fl.oif = sk->sk_bound_dev_if;
1754 fl.fl_ip_dport = inet->dport;
1755 fl.fl_ip_sport = inet->sport;
1757 if (np->opt && np->opt->srcrt) {
1758 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1759 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1762 err = ip6_dst_lookup(sk, &dst, &fl);
1765 sk->sk_route_caps = 0;
1769 ip6_dst_store(sk, dst, NULL);
1770 sk->sk_route_caps = dst->dev->features &
1771 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1772 tcp_sk(sk)->ext2_header_len = dst->header_len;
1778 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1780 struct sock *sk = skb->sk;
1781 struct inet_opt *inet = inet_sk(sk);
1782 struct ipv6_pinfo *np = inet6_sk(sk);
1784 struct dst_entry *dst;
1786 memset(&fl, 0, sizeof(fl));
1787 fl.proto = IPPROTO_TCP;
1788 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1789 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1790 fl.fl6_flowlabel = np->flow_label;
1791 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1792 fl.oif = sk->sk_bound_dev_if;
1793 fl.fl_ip_sport = inet->sport;
1794 fl.fl_ip_dport = inet->dport;
1796 if (np->opt && np->opt->srcrt) {
1797 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1798 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1801 dst = __sk_dst_check(sk, np->dst_cookie);
1804 int err = ip6_dst_lookup(sk, &dst, &fl);
1807 sk->sk_err_soft = -err;
1811 ip6_dst_store(sk, dst, NULL);
1812 sk->sk_route_caps = dst->dev->features &
1813 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1814 tcp_sk(sk)->ext2_header_len = dst->header_len;
1817 skb->dst = dst_clone(dst);
1819 /* Restore final destination back after routing done */
1820 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1822 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1825 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1827 struct ipv6_pinfo *np = inet6_sk(sk);
1828 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1830 sin6->sin6_family = AF_INET6;
1831 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1832 sin6->sin6_port = inet_sk(sk)->dport;
1833 /* We do not store received flowlabel for TCP */
1834 sin6->sin6_flowinfo = 0;
1835 sin6->sin6_scope_id = 0;
1836 if (sk->sk_bound_dev_if &&
1837 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1838 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1841 static int tcp_v6_remember_stamp(struct sock *sk)
1843 /* Alas, not yet... */
1847 static struct tcp_func ipv6_specific = {
1848 .queue_xmit = tcp_v6_xmit,
1849 .send_check = tcp_v6_send_check,
1850 .rebuild_header = tcp_v6_rebuild_header,
1851 .conn_request = tcp_v6_conn_request,
1852 .syn_recv_sock = tcp_v6_syn_recv_sock,
1853 .remember_stamp = tcp_v6_remember_stamp,
1854 .net_header_len = sizeof(struct ipv6hdr),
1856 .setsockopt = ipv6_setsockopt,
1857 .getsockopt = ipv6_getsockopt,
1858 .addr2sockaddr = v6_addr2sockaddr,
1859 .sockaddr_len = sizeof(struct sockaddr_in6)
1863 * TCP over IPv4 via INET6 API
1866 static struct tcp_func ipv6_mapped = {
1867 .queue_xmit = ip_queue_xmit,
1868 .send_check = tcp_v4_send_check,
1869 .rebuild_header = tcp_v4_rebuild_header,
1870 .conn_request = tcp_v6_conn_request,
1871 .syn_recv_sock = tcp_v6_syn_recv_sock,
1872 .remember_stamp = tcp_v4_remember_stamp,
1873 .net_header_len = sizeof(struct iphdr),
1875 .setsockopt = ipv6_setsockopt,
1876 .getsockopt = ipv6_getsockopt,
1877 .addr2sockaddr = v6_addr2sockaddr,
1878 .sockaddr_len = sizeof(struct sockaddr_in6)
1883 /* NOTE: A lot of things set to zero explicitly by call to
1884 * sk_alloc() so need not be done here.
1886 static int tcp_v6_init_sock(struct sock *sk)
1888 struct tcp_opt *tp = tcp_sk(sk);
1890 skb_queue_head_init(&tp->out_of_order_queue);
1891 tcp_init_xmit_timers(sk);
1892 tcp_prequeue_init(tp);
1894 tp->rto = TCP_TIMEOUT_INIT;
1895 tp->mdev = TCP_TIMEOUT_INIT;
1897 /* So many TCP implementations out there (incorrectly) count the
1898 * initial SYN frame in their delayed-ACK and congestion control
1899 * algorithms that we must have the following bandaid to talk
1900 * efficiently to them. -DaveM
1904 /* See draft-stevens-tcpca-spec-01 for discussion of the
1905 * initialization of these values.
1907 tp->snd_ssthresh = 0x7fffffff;
1908 tp->snd_cwnd_clamp = ~0;
1909 tp->mss_cache = 536;
1911 tp->reordering = sysctl_tcp_reordering;
1913 sk->sk_state = TCP_CLOSE;
1915 tp->af_specific = &ipv6_specific;
1917 sk->sk_write_space = sk_stream_write_space;
1918 sk->sk_use_write_queue = 1;
1920 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1921 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1923 atomic_inc(&tcp_sockets_allocated);
1928 static int tcp_v6_destroy_sock(struct sock *sk)
1930 extern int tcp_v4_destroy_sock(struct sock *sk);
1932 tcp_v4_destroy_sock(sk);
1933 return inet6_destroy_sock(sk);
1936 /* Proc filesystem TCPv6 sock list dumping. */
1937 static void get_openreq6(struct seq_file *seq,
1938 struct sock *sk, struct open_request *req, int i, int uid)
1940 struct in6_addr *dest, *src;
1941 int ttd = req->expires - jiffies;
1946 src = &req->af.v6_req.loc_addr;
1947 dest = &req->af.v6_req.rmt_addr;
1949 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1950 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1952 src->s6_addr32[0], src->s6_addr32[1],
1953 src->s6_addr32[2], src->s6_addr32[3],
1954 ntohs(inet_sk(sk)->sport),
1955 dest->s6_addr32[0], dest->s6_addr32[1],
1956 dest->s6_addr32[2], dest->s6_addr32[3],
1957 ntohs(req->rmt_port),
1959 0,0, /* could print option size, but that is af dependent. */
1960 1, /* timers active (only the expire timer) */
1961 jiffies_to_clock_t(ttd),
1964 0, /* non standard timer */
1965 0, /* open_requests have no inode */
1969 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1971 struct in6_addr *dest, *src;
1974 unsigned long timer_expires;
1975 struct inet_opt *inet = inet_sk(sp);
1976 struct tcp_opt *tp = tcp_sk(sp);
1977 struct ipv6_pinfo *np = inet6_sk(sp);
1980 src = &np->rcv_saddr;
1981 destp = ntohs(inet->dport);
1982 srcp = ntohs(inet->sport);
1983 if (tp->pending == TCP_TIME_RETRANS) {
1985 timer_expires = tp->timeout;
1986 } else if (tp->pending == TCP_TIME_PROBE0) {
1988 timer_expires = tp->timeout;
1989 } else if (timer_pending(&sp->sk_timer)) {
1991 timer_expires = sp->sk_timer.expires;
1994 timer_expires = jiffies;
1998 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1999 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2001 src->s6_addr32[0], src->s6_addr32[1],
2002 src->s6_addr32[2], src->s6_addr32[3], srcp,
2003 dest->s6_addr32[0], dest->s6_addr32[1],
2004 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2006 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2008 jiffies_to_clock_t(timer_expires - jiffies),
2013 atomic_read(&sp->sk_refcnt), sp,
2014 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2015 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2019 static void get_timewait6_sock(struct seq_file *seq,
2020 struct tcp_tw_bucket *tw, int i)
2022 struct in6_addr *dest, *src;
2024 int ttd = tw->tw_ttd - jiffies;
2029 dest = &tw->tw_v6_daddr;
2030 src = &tw->tw_v6_rcv_saddr;
2031 destp = ntohs(tw->tw_dport);
2032 srcp = ntohs(tw->tw_sport);
2035 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2036 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2038 src->s6_addr32[0], src->s6_addr32[1],
2039 src->s6_addr32[2], src->s6_addr32[3], srcp,
2040 dest->s6_addr32[0], dest->s6_addr32[1],
2041 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2042 tw->tw_substate, 0, 0,
2043 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2044 atomic_read(&tw->tw_refcnt), tw);
2047 #ifdef CONFIG_PROC_FS
2048 static int tcp6_seq_show(struct seq_file *seq, void *v)
2050 struct tcp_iter_state *st;
2052 if (v == SEQ_START_TOKEN) {
2057 "st tx_queue rx_queue tr tm->when retrnsmt"
2058 " uid timeout inode\n");
2063 switch (st->state) {
2064 case TCP_SEQ_STATE_LISTENING:
2065 case TCP_SEQ_STATE_ESTABLISHED:
2066 get_tcp6_sock(seq, v, st->num);
2068 case TCP_SEQ_STATE_OPENREQ:
2069 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2071 case TCP_SEQ_STATE_TIME_WAIT:
2072 get_timewait6_sock(seq, v, st->num);
2079 static struct file_operations tcp6_seq_fops;
2080 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2081 .owner = THIS_MODULE,
2084 .seq_show = tcp6_seq_show,
2085 .seq_fops = &tcp6_seq_fops,
2088 int __init tcp6_proc_init(void)
2090 return tcp_proc_register(&tcp6_seq_afinfo);
2093 void tcp6_proc_exit(void)
2095 tcp_proc_unregister(&tcp6_seq_afinfo);
2099 struct proto tcpv6_prot = {
2102 .connect = tcp_v6_connect,
2103 .disconnect = tcp_disconnect,
2104 .accept = tcp_accept,
2106 .init = tcp_v6_init_sock,
2107 .destroy = tcp_v6_destroy_sock,
2108 .shutdown = tcp_shutdown,
2109 .setsockopt = tcp_setsockopt,
2110 .getsockopt = tcp_getsockopt,
2111 .sendmsg = tcp_sendmsg,
2112 .recvmsg = tcp_recvmsg,
2113 .backlog_rcv = tcp_v6_do_rcv,
2114 .hash = tcp_v6_hash,
2115 .unhash = tcp_unhash,
2116 .get_port = tcp_v6_get_port,
2117 .enter_memory_pressure = tcp_enter_memory_pressure,
2118 .sockets_allocated = &tcp_sockets_allocated,
2119 .memory_allocated = &tcp_memory_allocated,
2120 .memory_pressure = &tcp_memory_pressure,
2121 .sysctl_mem = sysctl_tcp_mem,
2122 .sysctl_wmem = sysctl_tcp_wmem,
2123 .sysctl_rmem = sysctl_tcp_rmem,
2124 .max_header = MAX_TCP_HEADER,
2127 static struct inet6_protocol tcpv6_protocol = {
2128 .handler = tcp_v6_rcv,
2129 .err_handler = tcp_v6_err,
2130 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2133 extern struct proto_ops inet6_stream_ops;
2135 static struct inet_protosw tcpv6_protosw = {
2136 .type = SOCK_STREAM,
2137 .protocol = IPPROTO_TCP,
2138 .prot = &tcpv6_prot,
2139 .ops = &inet6_stream_ops,
2142 .flags = INET_PROTOSW_PERMANENT,
2145 void __init tcpv6_init(void)
2147 /* register inet6 protocol */
2148 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2149 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2150 inet6_register_protosw(&tcpv6_protosw);