3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <asm/uaccess.h>
60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h>
63 static void tcp_v6_send_reset(struct sk_buff *skb);
64 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
65 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
68 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
69 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
71 static struct tcp_func ipv6_mapped;
72 static struct tcp_func ipv6_specific;
74 /* I have no idea if this is a good hash for v6 or not. -DaveM */
75 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
76 struct in6_addr *faddr, u16 fport)
78 int hashent = (lport ^ fport);
80 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
81 hashent ^= hashent>>16;
82 hashent ^= hashent>>8;
83 return (hashent & (tcp_ehash_size - 1));
86 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
88 struct inet_opt *inet = inet_sk(sk);
89 struct ipv6_pinfo *np = inet6_sk(sk);
90 struct in6_addr *laddr = &np->rcv_saddr;
91 struct in6_addr *faddr = &np->daddr;
92 __u16 lport = inet->num;
93 __u16 fport = inet->dport;
94 return tcp_v6_hashfn(laddr, lport, faddr, fport);
97 static inline int tcp_v6_bind_conflict(struct sock *sk,
98 struct tcp_bind_bucket *tb)
101 struct hlist_node *node;
103 /* We must walk the whole port owner list in this case. -DaveM */
104 sk_for_each_bound(sk2, node, &tb->owners) {
106 (!sk->sk_bound_dev_if ||
107 !sk2->sk_bound_dev_if ||
108 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
109 (!sk->sk_reuse || !sk2->sk_reuse ||
110 sk2->sk_state == TCP_LISTEN) &&
111 ipv6_rcv_saddr_equal(sk, sk2))
118 /* Grrr, addr_type already calculated by caller, but I don't want
119 * to add some silly "cookie" argument to this method just for that.
120 * But it doesn't matter, the recalculation is in the rarest path
121 * this function ever takes.
123 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
125 struct tcp_bind_hashbucket *head;
126 struct tcp_bind_bucket *tb;
127 struct hlist_node *node;
132 int low = sysctl_local_port_range[0];
133 int high = sysctl_local_port_range[1];
134 int remaining = (high - low) + 1;
137 spin_lock(&tcp_portalloc_lock);
138 rover = tcp_port_rover;
140 if ((rover < low) || (rover > high))
142 head = &tcp_bhash[tcp_bhashfn(rover)];
143 spin_lock(&head->lock);
144 tb_for_each(tb, node, &head->chain)
145 if (tb->port == rover)
149 spin_unlock(&head->lock);
150 } while (--remaining > 0);
151 tcp_port_rover = rover;
152 spin_unlock(&tcp_portalloc_lock);
154 /* Exhausted local port range during search? */
159 /* OK, here is the one we will use. */
162 head = &tcp_bhash[tcp_bhashfn(snum)];
163 spin_lock(&head->lock);
164 tb_for_each(tb, node, &head->chain)
165 if (tb->port == snum)
171 if (tb && !hlist_empty(&tb->owners)) {
172 if (tb->fastreuse > 0 && sk->sk_reuse &&
173 sk->sk_state != TCP_LISTEN) {
177 if (tcp_v6_bind_conflict(sk, tb))
183 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
185 if (hlist_empty(&tb->owners)) {
186 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
190 } else if (tb->fastreuse &&
191 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
195 if (!tcp_sk(sk)->bind_hash)
196 tcp_bind_hash(sk, tb, snum);
197 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
201 spin_unlock(&head->lock);
207 static __inline__ void __tcp_v6_hash(struct sock *sk)
209 struct hlist_head *list;
212 BUG_TRAP(sk_unhashed(sk));
214 if (sk->sk_state == TCP_LISTEN) {
215 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
216 lock = &tcp_lhash_lock;
219 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
220 list = &tcp_ehash[sk->sk_hashent].chain;
221 lock = &tcp_ehash[sk->sk_hashent].lock;
225 __sk_add_node(sk, list);
226 sock_prot_inc_use(sk->sk_prot);
231 static void tcp_v6_hash(struct sock *sk)
233 if (sk->sk_state != TCP_CLOSE) {
234 struct tcp_opt *tp = tcp_sk(sk);
236 if (tp->af_specific == &ipv6_mapped) {
246 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
249 struct hlist_node *node;
250 struct sock *result = NULL;
254 read_lock(&tcp_lhash_lock);
255 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
256 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
257 struct ipv6_pinfo *np = inet6_sk(sk);
260 if (!ipv6_addr_any(&np->rcv_saddr)) {
261 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
265 if (sk->sk_bound_dev_if) {
266 if (sk->sk_bound_dev_if != dif)
274 if (score > hiscore) {
282 read_unlock(&tcp_lhash_lock);
286 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
287 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
289 * The sockhash lock must be held as a reader here.
292 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
293 struct in6_addr *daddr, u16 hnum,
296 struct tcp_ehash_bucket *head;
298 struct hlist_node *node;
299 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
302 /* Optimize here for direct hit, only listening connections can
303 * have wildcards anyways.
305 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
306 head = &tcp_ehash[hash];
307 read_lock(&head->lock);
308 sk_for_each(sk, node, &head->chain) {
309 /* For IPV6 do the cheaper port and family tests first. */
310 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
311 goto hit; /* You sunk my battleship! */
313 /* Must check for a TIME_WAIT'er before going to listener hash. */
314 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
315 /* FIXME: acme: check this... */
316 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
318 if(*((__u32 *)&(tw->tw_dport)) == ports &&
319 sk->sk_family == PF_INET6) {
320 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
321 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
322 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
326 read_unlock(&head->lock);
331 read_unlock(&head->lock);
336 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
337 struct in6_addr *daddr, u16 hnum,
342 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347 return tcp_v6_lookup_listener(daddr, hnum, dif);
350 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 dport,
357 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365 * Open request hash tables.
368 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
372 a = raddr->s6_addr32[0];
373 b = raddr->s6_addr32[1];
374 c = raddr->s6_addr32[2];
376 a += JHASH_GOLDEN_RATIO;
377 b += JHASH_GOLDEN_RATIO;
379 __jhash_mix(a, b, c);
381 a += raddr->s6_addr32[3];
383 __jhash_mix(a, b, c);
385 return c & (TCP_SYNQ_HSIZE - 1);
388 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
389 struct open_request ***prevp,
391 struct in6_addr *raddr,
392 struct in6_addr *laddr,
395 struct tcp_listen_opt *lopt = tp->listen_opt;
396 struct open_request *req, **prev;
398 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
399 (req = *prev) != NULL;
400 prev = &req->dl_next) {
401 if (req->rmt_port == rport &&
402 req->class->family == AF_INET6 &&
403 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
404 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
405 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
406 BUG_TRAP(req->sk == NULL);
415 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
416 struct in6_addr *saddr,
417 struct in6_addr *daddr,
420 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
423 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
425 if (skb->protocol == htons(ETH_P_IPV6)) {
426 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
427 skb->nh.ipv6h->saddr.s6_addr32,
431 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 static int tcp_v6_check_established(struct sock *sk)
440 struct inet_opt *inet = inet_sk(sk);
441 struct ipv6_pinfo *np = inet6_sk(sk);
442 struct in6_addr *daddr = &np->rcv_saddr;
443 struct in6_addr *saddr = &np->daddr;
444 int dif = sk->sk_bound_dev_if;
445 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
446 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
447 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
449 struct hlist_node *node;
450 struct tcp_tw_bucket *tw;
452 write_lock_bh(&head->lock);
454 /* Check TIME-WAIT sockets first. */
455 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
456 tw = (struct tcp_tw_bucket*)sk2;
458 if(*((__u32 *)&(tw->tw_dport)) == ports &&
459 sk2->sk_family == PF_INET6 &&
460 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
461 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
462 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
463 struct tcp_opt *tp = tcp_sk(sk);
465 if (tw->tw_ts_recent_stamp) {
466 /* See comment in tcp_ipv4.c */
467 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
470 tp->ts_recent = tw->tw_ts_recent;
471 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
480 /* And established part... */
481 sk_for_each(sk2, node, &head->chain) {
482 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
487 BUG_TRAP(sk_unhashed(sk));
488 __sk_add_node(sk, &head->chain);
489 sk->sk_hashent = hash;
490 sock_prot_inc_use(sk->sk_prot);
491 write_unlock_bh(&head->lock);
494 /* Silly. Should hash-dance instead... */
496 tcp_tw_deschedule(tw);
497 NET_INC_STATS_BH(TimeWaitRecycled);
505 write_unlock_bh(&head->lock);
506 return -EADDRNOTAVAIL;
509 static int tcp_v6_hash_connect(struct sock *sk)
511 struct tcp_bind_hashbucket *head;
512 struct tcp_bind_bucket *tb;
515 if (inet_sk(sk)->num == 0) {
516 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
519 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
522 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
525 spin_lock_bh(&head->lock);
527 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
529 spin_unlock_bh(&head->lock);
532 spin_unlock_bh(&head->lock);
533 return tcp_v6_check_established(sk);
537 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
539 struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
543 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
546 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
547 struct inet_opt *inet = inet_sk(sk);
548 struct ipv6_pinfo *np = inet6_sk(sk);
549 struct tcp_opt *tp = tcp_sk(sk);
550 struct in6_addr *saddr = NULL;
552 struct dst_entry *dst;
556 if (addr_len < SIN6_LEN_RFC2133)
559 if (usin->sin6_family != AF_INET6)
560 return(-EAFNOSUPPORT);
562 memset(&fl, 0, sizeof(fl));
565 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
566 IP6_ECN_flow_init(fl.fl6_flowlabel);
567 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
568 struct ip6_flowlabel *flowlabel;
569 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
570 if (flowlabel == NULL)
572 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
573 fl6_sock_release(flowlabel);
578 * connect() to INADDR_ANY means loopback (BSD'ism).
581 if(ipv6_addr_any(&usin->sin6_addr))
582 usin->sin6_addr.s6_addr[15] = 0x1;
584 addr_type = ipv6_addr_type(&usin->sin6_addr);
586 if(addr_type & IPV6_ADDR_MULTICAST)
589 if (addr_type&IPV6_ADDR_LINKLOCAL) {
590 if (addr_len >= sizeof(struct sockaddr_in6) &&
591 usin->sin6_scope_id) {
592 /* If interface is set while binding, indices
595 if (sk->sk_bound_dev_if &&
596 sk->sk_bound_dev_if != usin->sin6_scope_id)
599 sk->sk_bound_dev_if = usin->sin6_scope_id;
602 /* Connect to link-local address requires an interface */
603 if (!sk->sk_bound_dev_if)
607 if (tp->ts_recent_stamp &&
608 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
610 tp->ts_recent_stamp = 0;
614 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
615 np->flow_label = fl.fl6_flowlabel;
621 if (addr_type == IPV6_ADDR_MAPPED) {
622 u32 exthdrlen = tp->ext_header_len;
623 struct sockaddr_in sin;
625 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
627 if (__ipv6_only_sock(sk))
630 sin.sin_family = AF_INET;
631 sin.sin_port = usin->sin6_port;
632 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
634 tp->af_specific = &ipv6_mapped;
635 sk->sk_backlog_rcv = tcp_v4_do_rcv;
637 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
640 tp->ext_header_len = exthdrlen;
641 tp->af_specific = &ipv6_specific;
642 sk->sk_backlog_rcv = tcp_v6_do_rcv;
645 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
647 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
654 if (!ipv6_addr_any(&np->rcv_saddr))
655 saddr = &np->rcv_saddr;
657 fl.proto = IPPROTO_TCP;
658 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
659 ipv6_addr_copy(&fl.fl6_src,
660 (saddr ? saddr : &np->saddr));
661 fl.oif = sk->sk_bound_dev_if;
662 fl.fl_ip_dport = usin->sin6_port;
663 fl.fl_ip_sport = inet->sport;
665 if (np->opt && np->opt->srcrt) {
666 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
667 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
670 err = ip6_dst_lookup(sk, &dst, &fl);
677 ipv6_addr_copy(&np->rcv_saddr, saddr);
680 /* set the source address */
681 ipv6_addr_copy(&np->saddr, saddr);
682 inet->rcv_saddr = LOOPBACK4_IPV6;
684 ip6_dst_store(sk, dst, NULL);
685 sk->sk_route_caps = dst->dev->features &
686 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
688 tp->ext_header_len = 0;
690 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
691 tp->ext2_header_len = dst->header_len;
693 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
695 inet->dport = usin->sin6_port;
697 tcp_set_state(sk, TCP_SYN_SENT);
698 err = tcp_v6_hash_connect(sk);
703 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
708 err = tcp_connect(sk);
715 tcp_set_state(sk, TCP_CLOSE);
719 sk->sk_route_caps = 0;
723 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
724 int type, int code, int offset, __u32 info)
726 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
727 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
728 struct ipv6_pinfo *np;
734 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
737 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), Icmp6InErrors);
741 if (sk->sk_state == TCP_TIME_WAIT) {
742 tcp_tw_put((struct tcp_tw_bucket*)sk);
747 if (sock_owned_by_user(sk))
748 NET_INC_STATS_BH(LockDroppedIcmps);
750 if (sk->sk_state == TCP_CLOSE)
754 seq = ntohl(th->seq);
755 if (sk->sk_state != TCP_LISTEN &&
756 !between(seq, tp->snd_una, tp->snd_nxt)) {
757 NET_INC_STATS_BH(OutOfWindowIcmps);
763 if (type == ICMPV6_PKT_TOOBIG) {
764 struct dst_entry *dst = NULL;
766 if (sock_owned_by_user(sk))
768 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
771 /* icmp should have updated the destination cache entry */
772 dst = __sk_dst_check(sk, np->dst_cookie);
775 struct inet_opt *inet = inet_sk(sk);
778 /* BUGGG_FUTURE: Again, it is not clear how
779 to handle rthdr case. Ignore this complexity
782 memset(&fl, 0, sizeof(fl));
783 fl.proto = IPPROTO_TCP;
784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
785 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
786 fl.oif = sk->sk_bound_dev_if;
787 fl.fl_ip_dport = inet->dport;
788 fl.fl_ip_sport = inet->sport;
790 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
791 sk->sk_err_soft = -err;
797 if (tp->pmtu_cookie > dst_pmtu(dst)) {
798 tcp_sync_mss(sk, dst_pmtu(dst));
799 tcp_simple_retransmit(sk);
800 } /* else let the usual retransmit timer handle it */
805 icmpv6_err_convert(type, code, &err);
807 /* Might be for an open_request */
808 switch (sk->sk_state) {
809 struct open_request *req, **prev;
811 if (sock_owned_by_user(sk))
814 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
815 &hdr->saddr, tcp_v6_iif(skb));
819 /* ICMPs are not backlogged, hence we cannot get
820 * an established socket here.
822 BUG_TRAP(req->sk == NULL);
824 if (seq != req->snt_isn) {
825 NET_INC_STATS_BH(OutOfWindowIcmps);
829 tcp_synq_drop(sk, req, prev);
833 case TCP_SYN_RECV: /* Cannot happen.
834 It can, it SYNs are crossed. --ANK */
835 if (!sock_owned_by_user(sk)) {
836 TCP_INC_STATS_BH(TcpAttemptFails);
838 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
842 sk->sk_err_soft = err;
846 if (!sock_owned_by_user(sk) && np->recverr) {
848 sk->sk_error_report(sk);
850 sk->sk_err_soft = err;
858 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
859 struct dst_entry *dst)
861 struct ipv6_pinfo *np = inet6_sk(sk);
862 struct sk_buff * skb;
863 struct ipv6_txoptions *opt = NULL;
867 memset(&fl, 0, sizeof(fl));
868 fl.proto = IPPROTO_TCP;
869 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
870 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
871 fl.fl6_flowlabel = 0;
872 fl.oif = req->af.v6_req.iif;
873 fl.fl_ip_dport = req->rmt_port;
874 fl.fl_ip_sport = inet_sk(sk)->sport;
879 np->rxopt.bits.srcrt == 2 &&
880 req->af.v6_req.pktopts) {
881 struct sk_buff *pktopts = req->af.v6_req.pktopts;
882 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)pktopts->cb;
884 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
887 if (opt && opt->srcrt) {
888 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
889 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
892 err = ip6_dst_lookup(sk, &dst, &fl);
897 skb = tcp_make_synack(sk, dst, req);
899 struct tcphdr *th = skb->h.th;
901 th->check = tcp_v6_check(th, skb->len,
902 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
903 csum_partial((char *)th, skb->len, skb->csum));
905 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
906 err = ip6_xmit(sk, skb, &fl, opt, 0);
907 if (err == NET_XMIT_CN)
913 if (opt && opt != np->opt)
914 sock_kfree_s(sk, opt, opt->tot_len);
918 static void tcp_v6_or_free(struct open_request *req)
920 if (req->af.v6_req.pktopts)
921 kfree_skb(req->af.v6_req.pktopts);
924 static struct or_calltable or_ipv6 = {
926 .rtx_syn_ack = tcp_v6_send_synack,
927 .send_ack = tcp_v6_or_send_ack,
928 .destructor = tcp_v6_or_free,
929 .send_reset = tcp_v6_send_reset
932 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
934 struct ipv6_pinfo *np = inet6_sk(sk);
935 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
938 if ((opt->hop && np->rxopt.bits.hopopts) ||
939 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
940 np->rxopt.bits.rxflow) ||
941 (opt->srcrt && np->rxopt.bits.srcrt) ||
942 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
949 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
952 struct ipv6_pinfo *np = inet6_sk(sk);
954 if (skb->ip_summed == CHECKSUM_HW) {
955 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
956 skb->csum = offsetof(struct tcphdr, check);
958 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
959 csum_partial((char *)th, th->doff<<2,
965 static void tcp_v6_send_reset(struct sk_buff *skb)
967 struct tcphdr *th = skb->h.th, *t1;
968 struct sk_buff *buff;
974 if (!ipv6_unicast_destination(skb))
978 * We need to grab some memory, and put together an RST,
979 * and then put it into the queue to be sent.
982 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
986 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
988 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
990 /* Swap the send and the receive. */
991 memset(t1, 0, sizeof(*t1));
992 t1->dest = th->source;
993 t1->source = th->dest;
994 t1->doff = sizeof(*t1)/4;
998 t1->seq = th->ack_seq;
1001 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1002 + skb->len - (th->doff<<2));
1005 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1007 memset(&fl, 0, sizeof(fl));
1008 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1009 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1011 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1012 sizeof(*t1), IPPROTO_TCP,
1015 fl.proto = IPPROTO_TCP;
1016 fl.oif = tcp_v6_iif(skb);
1017 fl.fl_ip_dport = t1->dest;
1018 fl.fl_ip_sport = t1->source;
1020 /* sk = NULL, but it is safe for now. RST socket required. */
1021 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1022 ip6_xmit(NULL, buff, &fl, NULL, 0);
1023 TCP_INC_STATS_BH(TcpOutSegs);
1024 TCP_INC_STATS_BH(TcpOutRsts);
1031 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1033 struct tcphdr *th = skb->h.th, *t1;
1034 struct sk_buff *buff;
1036 int tot_len = sizeof(struct tcphdr);
1038 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1042 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1047 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1049 /* Swap the send and the receive. */
1050 memset(t1, 0, sizeof(*t1));
1051 t1->dest = th->source;
1052 t1->source = th->dest;
1053 t1->doff = tot_len/4;
1054 t1->seq = htonl(seq);
1055 t1->ack_seq = htonl(ack);
1057 t1->window = htons(win);
1060 u32 *ptr = (u32*)(t1 + 1);
1061 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1062 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1063 *ptr++ = htonl(tcp_time_stamp);
1067 buff->csum = csum_partial((char *)t1, tot_len, 0);
1069 memset(&fl, 0, sizeof(fl));
1070 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1071 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1073 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1074 tot_len, IPPROTO_TCP,
1077 fl.proto = IPPROTO_TCP;
1078 fl.oif = tcp_v6_iif(skb);
1079 fl.fl_ip_dport = t1->dest;
1080 fl.fl_ip_sport = t1->source;
1082 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1083 ip6_xmit(NULL, buff, &fl, NULL, 0);
1084 TCP_INC_STATS_BH(TcpOutSegs);
1091 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1093 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1095 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1096 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1101 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1103 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1107 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1109 struct open_request *req, **prev;
1110 struct tcphdr *th = skb->h.th;
1111 struct tcp_opt *tp = tcp_sk(sk);
1114 /* Find possible connection requests. */
1115 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1116 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1118 return tcp_check_req(sk, skb, req, prev);
1120 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1122 &skb->nh.ipv6h->daddr,
1127 if (nsk->sk_state != TCP_TIME_WAIT) {
1131 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1135 #if 0 /*def CONFIG_SYN_COOKIES*/
1136 if (!th->rst && !th->syn && th->ack)
1137 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1142 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1144 struct tcp_opt *tp = tcp_sk(sk);
1145 struct tcp_listen_opt *lopt = tp->listen_opt;
1146 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1149 req->expires = jiffies + TCP_TIMEOUT_INIT;
1151 req->dl_next = lopt->syn_table[h];
1153 write_lock(&tp->syn_wait_lock);
1154 lopt->syn_table[h] = req;
1155 write_unlock(&tp->syn_wait_lock);
1157 #ifdef CONFIG_ACCEPT_QUEUES
1158 tcp_synq_added(sk, req);
1165 /* FIXME: this is substantially similar to the ipv4 code.
1166 * Can some kind of merge be done? -- erics
1168 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1172 struct open_request *req = NULL;
1173 __u32 isn = TCP_SKB_CB(skb)->when;
1174 #ifdef CONFIG_ACCEPT_QUEUES
1178 if (skb->protocol == htons(ETH_P_IP))
1179 return tcp_v4_conn_request(sk, skb);
1181 if (!ipv6_unicast_destination(skb))
1186 * There are no SYN attacks on IPv6, yet...
1188 if (tcp_synq_is_full(sk) && !isn) {
1189 if (net_ratelimit())
1190 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1194 #ifdef CONFIG_ACCEPT_QUEUES
1195 class = (skb->nfmark <= 0) ? 0 :
1196 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1198 * Accept only if the class has shares set or if the default class
1199 * i.e. class 0 has shares
1201 if (!(tcp_sk(sk)->acceptq[class].aq_valid)) {
1202 if (tcp_sk(sk)->acceptq[0].aq_valid)
1209 /* Accept backlog is full. If we have already queued enough
1210 * of warm entries in syn queue, drop request. It is better than
1211 * clogging syn queue with openreqs with exponentially increasing
1214 #ifdef CONFIG_ACCEPT_QUEUES
1215 if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1217 if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1222 req = tcp_openreq_alloc();
1226 tcp_clear_options(&tmptp);
1227 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1228 tmptp.user_mss = tp->user_mss;
1230 tcp_parse_options(skb, &tmptp, 0);
1232 tmptp.tstamp_ok = tmptp.saw_tstamp;
1233 tcp_openreq_init(req, &tmptp, skb);
1234 #ifdef CONFIG_ACCEPT_QUEUES
1235 req->acceptq_class = class;
1236 req->acceptq_time_stamp = jiffies;
1238 req->class = &or_ipv6;
1239 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1240 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1241 TCP_ECN_create_request(req, skb->h.th);
1242 req->af.v6_req.pktopts = NULL;
1243 if (ipv6_opt_accepted(sk, skb) ||
1244 np->rxopt.bits.rxinfo ||
1245 np->rxopt.bits.rxhlim) {
1246 atomic_inc(&skb->users);
1247 req->af.v6_req.pktopts = skb;
1249 req->af.v6_req.iif = sk->sk_bound_dev_if;
1251 /* So that link locals have meaning */
1252 if (!sk->sk_bound_dev_if &&
1253 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1254 req->af.v6_req.iif = tcp_v6_iif(skb);
1257 isn = tcp_v6_init_sequence(sk,skb);
1261 if (tcp_v6_send_synack(sk, req, NULL))
1264 tcp_v6_synq_add(sk, req);
1270 tcp_openreq_free(req);
1272 TCP_INC_STATS_BH(TcpAttemptFails);
1273 return 0; /* don't send reset */
1276 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1277 struct open_request *req,
1278 struct dst_entry *dst)
1280 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1281 struct tcp6_sock *newtcp6sk;
1282 struct inet_opt *newinet;
1283 struct tcp_opt *newtp;
1285 struct ipv6_txoptions *opt;
1287 if (skb->protocol == htons(ETH_P_IP)) {
1292 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1297 newtcp6sk = (struct tcp6_sock *)newsk;
1298 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1300 newinet = inet_sk(newsk);
1301 newnp = inet6_sk(newsk);
1302 newtp = tcp_sk(newsk);
1304 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1306 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1309 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1312 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1314 newtp->af_specific = &ipv6_mapped;
1315 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1316 newnp->pktoptions = NULL;
1318 newnp->mcast_oif = tcp_v6_iif(skb);
1319 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1321 /* Charge newly allocated IPv6 socket. Though it is mapped,
1324 #ifdef INET_REFCNT_DEBUG
1325 atomic_inc(&inet6_sock_nr);
1328 /* It is tricky place. Until this moment IPv4 tcp
1329 worked with IPv6 af_tcp.af_specific.
1332 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1339 #ifdef CONFIG_ACCEPT_QUEUES
1340 if (tcp_acceptq_is_full(sk, req->acceptq_class))
1342 if (tcp_acceptq_is_full(sk))
1346 if (np->rxopt.bits.srcrt == 2 &&
1347 opt == NULL && req->af.v6_req.pktopts) {
1348 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)req->af.v6_req.pktopts->cb;
1350 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1356 memset(&fl, 0, sizeof(fl));
1357 fl.proto = IPPROTO_TCP;
1358 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1359 if (opt && opt->srcrt) {
1360 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1361 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1363 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1364 fl.oif = sk->sk_bound_dev_if;
1365 fl.fl_ip_dport = req->rmt_port;
1366 fl.fl_ip_sport = inet_sk(sk)->sport;
1368 if (ip6_dst_lookup(sk, &dst, &fl))
1372 newsk = tcp_create_openreq_child(sk, req, skb);
1376 /* Charge newly allocated IPv6 socket */
1377 #ifdef INET_REFCNT_DEBUG
1378 atomic_inc(&inet6_sock_nr);
1381 ip6_dst_store(newsk, dst, NULL);
1382 newsk->sk_route_caps = dst->dev->features &
1383 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1385 newtcp6sk = (struct tcp6_sock *)newsk;
1386 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1388 newtp = tcp_sk(newsk);
1389 newinet = inet_sk(newsk);
1390 newnp = inet6_sk(newsk);
1392 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1394 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1395 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1396 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1397 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1399 /* Now IPv6 options...
1401 First: no IPv4 options.
1403 newinet->opt = NULL;
1406 newnp->rxopt.all = np->rxopt.all;
1408 /* Clone pktoptions received with SYN */
1409 newnp->pktoptions = NULL;
1410 if (req->af.v6_req.pktopts) {
1411 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1413 kfree_skb(req->af.v6_req.pktopts);
1414 req->af.v6_req.pktopts = NULL;
1415 if (newnp->pktoptions)
1416 skb_set_owner_r(newnp->pktoptions, newsk);
1419 newnp->mcast_oif = tcp_v6_iif(skb);
1420 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1422 /* Clone native IPv6 options from listening socket (if any)
1424 Yes, keeping reference count would be much more clever,
1425 but we make one more one thing there: reattach optmem
1429 newnp->opt = ipv6_dup_options(newsk, opt);
1431 sock_kfree_s(sk, opt, opt->tot_len);
1434 newtp->ext_header_len = 0;
1436 newtp->ext_header_len = newnp->opt->opt_nflen +
1437 newnp->opt->opt_flen;
1438 newtp->ext2_header_len = dst->header_len;
1440 tcp_sync_mss(newsk, dst_pmtu(dst));
1441 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1442 tcp_initialize_rcv_mss(newsk);
1444 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1446 __tcp_v6_hash(newsk);
1447 tcp_inherit_port(sk, newsk);
1452 NET_INC_STATS_BH(ListenOverflows);
1454 NET_INC_STATS_BH(ListenDrops);
1455 if (opt && opt != np->opt)
1456 sock_kfree_s(sk, opt, opt->tot_len);
1461 static int tcp_v6_checksum_init(struct sk_buff *skb)
1463 if (skb->ip_summed == CHECKSUM_HW) {
1464 skb->ip_summed = CHECKSUM_UNNECESSARY;
1465 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1466 &skb->nh.ipv6h->daddr,skb->csum))
1468 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1470 if (skb->len <= 76) {
1471 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1472 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1474 skb->ip_summed = CHECKSUM_UNNECESSARY;
1476 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1477 &skb->nh.ipv6h->daddr,0);
1482 /* The socket must have it's spinlock held when we get
1485 * We have a potential double-lock case here, so even when
1486 * doing backlog processing we use the BH locking scheme.
1487 * This is because we cannot sleep with the original spinlock
1490 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1492 struct ipv6_pinfo *np = inet6_sk(sk);
1494 struct sk_buff *opt_skb = NULL;
1496 /* Imagine: socket is IPv6. IPv4 packet arrives,
1497 goes to IPv4 receive handler and backlogged.
1498 From backlog it always goes here. Kerboom...
1499 Fortunately, tcp_rcv_established and rcv_established
1500 handle them correctly, but it is not case with
1501 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1504 if (skb->protocol == htons(ETH_P_IP))
1505 return tcp_v4_do_rcv(sk, skb);
1507 if (sk_filter(sk, skb, 0))
1511 * socket locking is here for SMP purposes as backlog rcv
1512 * is currently called with bh processing disabled.
1515 /* Do Stevens' IPV6_PKTOPTIONS.
1517 Yes, guys, it is the only place in our code, where we
1518 may make it not affecting IPv4.
1519 The rest of code is protocol independent,
1520 and I do not like idea to uglify IPv4.
1522 Actually, all the idea behind IPV6_PKTOPTIONS
1523 looks not very well thought. For now we latch
1524 options, received in the last packet, enqueued
1525 by tcp. Feel free to propose better solution.
1529 opt_skb = skb_clone(skb, GFP_ATOMIC);
1531 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1532 TCP_CHECK_TIMER(sk);
1533 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1535 TCP_CHECK_TIMER(sk);
1537 goto ipv6_pktoptions;
1541 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1544 if (sk->sk_state == TCP_LISTEN) {
1545 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1550 * Queue it on the new socket if the new socket is active,
1551 * otherwise we just shortcircuit this and continue with
1555 if (tcp_child_process(sk, nsk, skb))
1558 __kfree_skb(opt_skb);
1563 TCP_CHECK_TIMER(sk);
1564 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1566 TCP_CHECK_TIMER(sk);
1568 goto ipv6_pktoptions;
1572 tcp_v6_send_reset(skb);
1575 __kfree_skb(opt_skb);
1579 TCP_INC_STATS_BH(TcpInErrs);
1584 /* Do you ask, what is it?
1586 1. skb was enqueued by tcp.
1587 2. skb is added to tail of read queue, rather than out of order.
1588 3. socket is not in passive state.
1589 4. Finally, it really contains options, which user wants to receive.
1592 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1593 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1594 if (np->rxopt.bits.rxinfo)
1595 np->mcast_oif = tcp_v6_iif(opt_skb);
1596 if (np->rxopt.bits.rxhlim)
1597 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1598 if (ipv6_opt_accepted(sk, opt_skb)) {
1599 skb_set_owner_r(opt_skb, sk);
1600 opt_skb = xchg(&np->pktoptions, opt_skb);
1602 __kfree_skb(opt_skb);
1603 opt_skb = xchg(&np->pktoptions, NULL);
1612 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1614 struct sk_buff *skb = *pskb;
1619 if (skb->pkt_type != PACKET_HOST)
1623 * Count it even if it's bad.
1625 TCP_INC_STATS_BH(TcpInSegs);
1627 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1632 if (th->doff < sizeof(struct tcphdr)/4)
1634 if (!pskb_may_pull(skb, th->doff*4))
1637 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1638 tcp_v6_checksum_init(skb) < 0))
1642 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1643 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1644 skb->len - th->doff*4);
1645 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1646 TCP_SKB_CB(skb)->when = 0;
1647 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1648 TCP_SKB_CB(skb)->sacked = 0;
1650 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1651 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1657 if (sk->sk_state == TCP_TIME_WAIT)
1660 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1661 goto discard_and_relse;
1663 if (sk_filter(sk, skb, 0))
1664 goto discard_and_relse;
1670 if (!sock_owned_by_user(sk)) {
1671 if (!tcp_prequeue(sk, skb))
1672 ret = tcp_v6_do_rcv(sk, skb);
1674 sk_add_backlog(sk, skb);
1678 return ret ? -1 : 0;
1681 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1684 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1686 TCP_INC_STATS_BH(TcpInErrs);
1688 tcp_v6_send_reset(skb);
1705 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1706 tcp_tw_put((struct tcp_tw_bucket *) sk);
1710 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1711 TCP_INC_STATS_BH(TcpInErrs);
1712 tcp_tw_put((struct tcp_tw_bucket *) sk);
1716 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1717 skb, th, skb->len)) {
1722 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1724 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1725 tcp_tw_put((struct tcp_tw_bucket *)sk);
1729 /* Fall through to ACK */
1732 tcp_v6_timewait_ack(sk, skb);
1736 case TCP_TW_SUCCESS:;
1741 static int tcp_v6_rebuild_header(struct sock *sk)
1744 struct dst_entry *dst;
1745 struct ipv6_pinfo *np = inet6_sk(sk);
1747 dst = __sk_dst_check(sk, np->dst_cookie);
1750 struct inet_opt *inet = inet_sk(sk);
1753 memset(&fl, 0, sizeof(fl));
1754 fl.proto = IPPROTO_TCP;
1755 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1756 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1757 fl.fl6_flowlabel = np->flow_label;
1758 fl.oif = sk->sk_bound_dev_if;
1759 fl.fl_ip_dport = inet->dport;
1760 fl.fl_ip_sport = inet->sport;
1762 if (np->opt && np->opt->srcrt) {
1763 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1764 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1767 err = ip6_dst_lookup(sk, &dst, &fl);
1770 sk->sk_route_caps = 0;
1774 ip6_dst_store(sk, dst, NULL);
1775 sk->sk_route_caps = dst->dev->features &
1776 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1777 tcp_sk(sk)->ext2_header_len = dst->header_len;
1783 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1785 struct sock *sk = skb->sk;
1786 struct inet_opt *inet = inet_sk(sk);
1787 struct ipv6_pinfo *np = inet6_sk(sk);
1789 struct dst_entry *dst;
1791 memset(&fl, 0, sizeof(fl));
1792 fl.proto = IPPROTO_TCP;
1793 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1794 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1795 fl.fl6_flowlabel = np->flow_label;
1796 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1797 fl.oif = sk->sk_bound_dev_if;
1798 fl.fl_ip_sport = inet->sport;
1799 fl.fl_ip_dport = inet->dport;
1801 if (np->opt && np->opt->srcrt) {
1802 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1803 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1806 dst = __sk_dst_check(sk, np->dst_cookie);
1809 int err = ip6_dst_lookup(sk, &dst, &fl);
1812 sk->sk_err_soft = -err;
1816 ip6_dst_store(sk, dst, NULL);
1817 sk->sk_route_caps = dst->dev->features &
1818 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1819 tcp_sk(sk)->ext2_header_len = dst->header_len;
1822 skb->dst = dst_clone(dst);
1824 /* Restore final destination back after routing done */
1825 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1827 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1830 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1832 struct ipv6_pinfo *np = inet6_sk(sk);
1833 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1835 sin6->sin6_family = AF_INET6;
1836 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1837 sin6->sin6_port = inet_sk(sk)->dport;
1838 /* We do not store received flowlabel for TCP */
1839 sin6->sin6_flowinfo = 0;
1840 sin6->sin6_scope_id = 0;
1841 if (sk->sk_bound_dev_if &&
1842 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1843 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1846 static int tcp_v6_remember_stamp(struct sock *sk)
1848 /* Alas, not yet... */
1852 static struct tcp_func ipv6_specific = {
1853 .queue_xmit = tcp_v6_xmit,
1854 .send_check = tcp_v6_send_check,
1855 .rebuild_header = tcp_v6_rebuild_header,
1856 .conn_request = tcp_v6_conn_request,
1857 .syn_recv_sock = tcp_v6_syn_recv_sock,
1858 .remember_stamp = tcp_v6_remember_stamp,
1859 .net_header_len = sizeof(struct ipv6hdr),
1861 .setsockopt = ipv6_setsockopt,
1862 .getsockopt = ipv6_getsockopt,
1863 .addr2sockaddr = v6_addr2sockaddr,
1864 .sockaddr_len = sizeof(struct sockaddr_in6)
1868 * TCP over IPv4 via INET6 API
1871 static struct tcp_func ipv6_mapped = {
1872 .queue_xmit = ip_queue_xmit,
1873 .send_check = tcp_v4_send_check,
1874 .rebuild_header = tcp_v4_rebuild_header,
1875 .conn_request = tcp_v6_conn_request,
1876 .syn_recv_sock = tcp_v6_syn_recv_sock,
1877 .remember_stamp = tcp_v4_remember_stamp,
1878 .net_header_len = sizeof(struct iphdr),
1880 .setsockopt = ipv6_setsockopt,
1881 .getsockopt = ipv6_getsockopt,
1882 .addr2sockaddr = v6_addr2sockaddr,
1883 .sockaddr_len = sizeof(struct sockaddr_in6)
1888 /* NOTE: A lot of things set to zero explicitly by call to
1889 * sk_alloc() so need not be done here.
1891 static int tcp_v6_init_sock(struct sock *sk)
1893 struct tcp_opt *tp = tcp_sk(sk);
1895 skb_queue_head_init(&tp->out_of_order_queue);
1896 tcp_init_xmit_timers(sk);
1897 tcp_prequeue_init(tp);
1899 tp->rto = TCP_TIMEOUT_INIT;
1900 tp->mdev = TCP_TIMEOUT_INIT;
1902 /* So many TCP implementations out there (incorrectly) count the
1903 * initial SYN frame in their delayed-ACK and congestion control
1904 * algorithms that we must have the following bandaid to talk
1905 * efficiently to them. -DaveM
1909 /* See draft-stevens-tcpca-spec-01 for discussion of the
1910 * initialization of these values.
1912 tp->snd_ssthresh = 0x7fffffff;
1913 tp->snd_cwnd_clamp = ~0;
1914 tp->mss_cache = 536;
1916 tp->reordering = sysctl_tcp_reordering;
1918 sk->sk_state = TCP_CLOSE;
1920 tp->af_specific = &ipv6_specific;
1922 sk->sk_write_space = tcp_write_space;
1923 sk->sk_use_write_queue = 1;
1925 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1926 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1928 atomic_inc(&tcp_sockets_allocated);
1933 static int tcp_v6_destroy_sock(struct sock *sk)
1935 struct tcp_opt *tp = tcp_sk(sk);
1936 struct inet_opt *inet = inet_sk(sk);
1938 tcp_clear_xmit_timers(sk);
1940 /* Cleanup up the write buffer. */
1941 tcp_writequeue_purge(sk);
1943 /* Cleans up our, hopefully empty, out_of_order_queue. */
1944 __skb_queue_purge(&tp->out_of_order_queue);
1946 /* Clean prequeue, it must be empty really */
1947 __skb_queue_purge(&tp->ucopy.prequeue);
1949 /* Clean up a referenced TCP bind bucket. */
1950 if (tcp_sk(sk)->bind_hash)
1953 /* If sendmsg cached page exists, toss it. */
1954 if (inet->sndmsg_page != NULL)
1955 __free_page(inet->sndmsg_page);
1957 atomic_dec(&tcp_sockets_allocated);
1959 return inet6_destroy_sock(sk);
1962 /* Proc filesystem TCPv6 sock list dumping. */
1963 static void get_openreq6(struct seq_file *seq,
1964 struct sock *sk, struct open_request *req, int i, int uid)
1966 struct in6_addr *dest, *src;
1967 int ttd = req->expires - jiffies;
1972 src = &req->af.v6_req.loc_addr;
1973 dest = &req->af.v6_req.rmt_addr;
1975 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1976 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1978 src->s6_addr32[0], src->s6_addr32[1],
1979 src->s6_addr32[2], src->s6_addr32[3],
1980 ntohs(inet_sk(sk)->sport),
1981 dest->s6_addr32[0], dest->s6_addr32[1],
1982 dest->s6_addr32[2], dest->s6_addr32[3],
1983 ntohs(req->rmt_port),
1985 0,0, /* could print option size, but that is af dependent. */
1986 1, /* timers active (only the expire timer) */
1987 jiffies_to_clock_t(ttd),
1990 0, /* non standard timer */
1991 0, /* open_requests have no inode */
1995 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1997 struct in6_addr *dest, *src;
2000 unsigned long timer_expires;
2001 struct inet_opt *inet = inet_sk(sp);
2002 struct tcp_opt *tp = tcp_sk(sp);
2003 struct ipv6_pinfo *np = inet6_sk(sp);
2006 src = &np->rcv_saddr;
2007 destp = ntohs(inet->dport);
2008 srcp = ntohs(inet->sport);
2009 if (tp->pending == TCP_TIME_RETRANS) {
2011 timer_expires = tp->timeout;
2012 } else if (tp->pending == TCP_TIME_PROBE0) {
2014 timer_expires = tp->timeout;
2015 } else if (timer_pending(&sp->sk_timer)) {
2017 timer_expires = sp->sk_timer.expires;
2020 timer_expires = jiffies;
2024 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2025 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2027 src->s6_addr32[0], src->s6_addr32[1],
2028 src->s6_addr32[2], src->s6_addr32[3], srcp,
2029 dest->s6_addr32[0], dest->s6_addr32[1],
2030 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2032 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2034 jiffies_to_clock_t(timer_expires - jiffies),
2039 atomic_read(&sp->sk_refcnt), sp,
2040 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2041 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2045 static void get_timewait6_sock(struct seq_file *seq,
2046 struct tcp_tw_bucket *tw, int i)
2048 struct in6_addr *dest, *src;
2050 int ttd = tw->tw_ttd - jiffies;
2055 dest = &tw->tw_v6_daddr;
2056 src = &tw->tw_v6_rcv_saddr;
2057 destp = ntohs(tw->tw_dport);
2058 srcp = ntohs(tw->tw_sport);
2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2064 src->s6_addr32[0], src->s6_addr32[1],
2065 src->s6_addr32[2], src->s6_addr32[3], srcp,
2066 dest->s6_addr32[0], dest->s6_addr32[1],
2067 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2068 tw->tw_substate, 0, 0,
2069 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2070 atomic_read(&tw->tw_refcnt), tw);
2073 #ifdef CONFIG_PROC_FS
2074 static int tcp6_seq_show(struct seq_file *seq, void *v)
2076 struct tcp_iter_state *st;
2078 if (v == SEQ_START_TOKEN) {
2083 "st tx_queue rx_queue tr tm->when retrnsmt"
2084 " uid timeout inode\n");
2089 switch (st->state) {
2090 case TCP_SEQ_STATE_LISTENING:
2091 case TCP_SEQ_STATE_ESTABLISHED:
2092 get_tcp6_sock(seq, v, st->num);
2094 case TCP_SEQ_STATE_OPENREQ:
2095 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2097 case TCP_SEQ_STATE_TIME_WAIT:
2098 get_timewait6_sock(seq, v, st->num);
2105 static struct file_operations tcp6_seq_fops;
2106 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2107 .owner = THIS_MODULE,
2110 .seq_show = tcp6_seq_show,
2111 .seq_fops = &tcp6_seq_fops,
2114 int __init tcp6_proc_init(void)
2116 return tcp_proc_register(&tcp6_seq_afinfo);
2119 void tcp6_proc_exit(void)
2121 tcp_proc_unregister(&tcp6_seq_afinfo);
2125 struct proto tcpv6_prot = {
2128 .connect = tcp_v6_connect,
2129 .disconnect = tcp_disconnect,
2130 .accept = tcp_accept,
2132 .init = tcp_v6_init_sock,
2133 .destroy = tcp_v6_destroy_sock,
2134 .shutdown = tcp_shutdown,
2135 .setsockopt = tcp_setsockopt,
2136 .getsockopt = tcp_getsockopt,
2137 .sendmsg = tcp_sendmsg,
2138 .recvmsg = tcp_recvmsg,
2139 .backlog_rcv = tcp_v6_do_rcv,
2140 .hash = tcp_v6_hash,
2141 .unhash = tcp_unhash,
2142 .get_port = tcp_v6_get_port,
2145 static struct inet6_protocol tcpv6_protocol = {
2146 .handler = tcp_v6_rcv,
2147 .err_handler = tcp_v6_err,
2148 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2151 extern struct proto_ops inet6_stream_ops;
2153 static struct inet_protosw tcpv6_protosw = {
2154 .type = SOCK_STREAM,
2155 .protocol = IPPROTO_TCP,
2156 .prot = &tcpv6_prot,
2157 .ops = &inet6_stream_ops,
2160 .flags = INET_PROTOSW_PERMANENT,
2163 void __init tcpv6_init(void)
2165 /* register inet6 protocol */
2166 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2167 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2168 inet6_register_protosw(&tcpv6_protosw);