3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
59 #include <asm/uaccess.h>
61 #include <linux/proc_fs.h>
62 #include <linux/seq_file.h>
64 static void tcp_v6_send_reset(struct sk_buff *skb);
65 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
66 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
69 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
70 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
72 static struct tcp_func ipv6_mapped;
73 static struct tcp_func ipv6_specific;
75 /* I have no idea if this is a good hash for v6 or not. -DaveM */
76 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
77 struct in6_addr *faddr, u16 fport)
79 int hashent = (lport ^ fport);
81 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
82 hashent ^= hashent>>16;
83 hashent ^= hashent>>8;
84 return (hashent & (tcp_ehash_size - 1));
87 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
89 struct inet_opt *inet = inet_sk(sk);
90 struct ipv6_pinfo *np = inet6_sk(sk);
91 struct in6_addr *laddr = &np->rcv_saddr;
92 struct in6_addr *faddr = &np->daddr;
93 __u16 lport = inet->num;
94 __u16 fport = inet->dport;
95 return tcp_v6_hashfn(laddr, lport, faddr, fport);
98 static inline int tcp_v6_bind_conflict(struct sock *sk,
99 struct tcp_bind_bucket *tb)
102 struct hlist_node *node;
104 /* We must walk the whole port owner list in this case. -DaveM */
105 sk_for_each_bound(sk2, node, &tb->owners) {
107 (!sk->sk_bound_dev_if ||
108 !sk2->sk_bound_dev_if ||
109 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
110 (!sk->sk_reuse || !sk2->sk_reuse ||
111 sk2->sk_state == TCP_LISTEN) &&
112 ipv6_rcv_saddr_equal(sk, sk2))
119 /* Grrr, addr_type already calculated by caller, but I don't want
120 * to add some silly "cookie" argument to this method just for that.
121 * But it doesn't matter, the recalculation is in the rarest path
122 * this function ever takes.
124 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
126 struct tcp_bind_hashbucket *head;
127 struct tcp_bind_bucket *tb;
128 struct hlist_node *node;
133 int low = sysctl_local_port_range[0];
134 int high = sysctl_local_port_range[1];
135 int remaining = (high - low) + 1;
138 spin_lock(&tcp_portalloc_lock);
139 rover = tcp_port_rover;
141 if ((rover < low) || (rover > high))
143 head = &tcp_bhash[tcp_bhashfn(rover)];
144 spin_lock(&head->lock);
145 tb_for_each(tb, node, &head->chain)
146 if (tb->port == rover)
150 spin_unlock(&head->lock);
151 } while (--remaining > 0);
152 tcp_port_rover = rover;
153 spin_unlock(&tcp_portalloc_lock);
155 /* Exhausted local port range during search? */
160 /* OK, here is the one we will use. */
163 head = &tcp_bhash[tcp_bhashfn(snum)];
164 spin_lock(&head->lock);
165 tb_for_each(tb, node, &head->chain)
166 if (tb->port == snum)
172 if (tb && !hlist_empty(&tb->owners)) {
173 if (tb->fastreuse > 0 && sk->sk_reuse &&
174 sk->sk_state != TCP_LISTEN) {
178 if (tcp_v6_bind_conflict(sk, tb))
184 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
186 if (hlist_empty(&tb->owners)) {
187 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
191 } else if (tb->fastreuse &&
192 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
196 if (!tcp_sk(sk)->bind_hash)
197 tcp_bind_hash(sk, tb, snum);
198 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
202 spin_unlock(&head->lock);
208 static __inline__ void __tcp_v6_hash(struct sock *sk)
210 struct hlist_head *list;
213 BUG_TRAP(sk_unhashed(sk));
215 if (sk->sk_state == TCP_LISTEN) {
216 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
217 lock = &tcp_lhash_lock;
220 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
221 list = &tcp_ehash[sk->sk_hashent].chain;
222 lock = &tcp_ehash[sk->sk_hashent].lock;
226 __sk_add_node(sk, list);
227 sock_prot_inc_use(sk->sk_prot);
232 static void tcp_v6_hash(struct sock *sk)
234 if (sk->sk_state != TCP_CLOSE) {
235 struct tcp_opt *tp = tcp_sk(sk);
237 if (tp->af_specific == &ipv6_mapped) {
247 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
250 struct hlist_node *node;
251 struct sock *result = NULL;
255 read_lock(&tcp_lhash_lock);
256 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
257 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
258 struct ipv6_pinfo *np = inet6_sk(sk);
261 if (!ipv6_addr_any(&np->rcv_saddr)) {
262 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
266 if (sk->sk_bound_dev_if) {
267 if (sk->sk_bound_dev_if != dif)
275 if (score > hiscore) {
283 read_unlock(&tcp_lhash_lock);
287 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
288 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
290 * The sockhash lock must be held as a reader here.
293 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
294 struct in6_addr *daddr, u16 hnum,
297 struct tcp_ehash_bucket *head;
299 struct hlist_node *node;
300 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
303 /* Optimize here for direct hit, only listening connections can
304 * have wildcards anyways.
306 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
307 head = &tcp_ehash[hash];
308 read_lock(&head->lock);
309 sk_for_each(sk, node, &head->chain) {
310 /* For IPV6 do the cheaper port and family tests first. */
311 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
312 goto hit; /* You sunk my battleship! */
314 /* Must check for a TIME_WAIT'er before going to listener hash. */
315 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
316 /* FIXME: acme: check this... */
317 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
319 if(*((__u32 *)&(tw->tw_dport)) == ports &&
320 sk->sk_family == PF_INET6) {
321 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
322 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
323 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
327 read_unlock(&head->lock);
332 read_unlock(&head->lock);
337 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
338 struct in6_addr *daddr, u16 hnum,
343 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
348 return tcp_v6_lookup_listener(daddr, hnum, dif);
351 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
352 struct in6_addr *daddr, u16 dport,
358 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
366 * Open request hash tables.
369 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
373 a = raddr->s6_addr32[0];
374 b = raddr->s6_addr32[1];
375 c = raddr->s6_addr32[2];
377 a += JHASH_GOLDEN_RATIO;
378 b += JHASH_GOLDEN_RATIO;
380 __jhash_mix(a, b, c);
382 a += raddr->s6_addr32[3];
384 __jhash_mix(a, b, c);
386 return c & (TCP_SYNQ_HSIZE - 1);
389 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
390 struct open_request ***prevp,
392 struct in6_addr *raddr,
393 struct in6_addr *laddr,
396 struct tcp_listen_opt *lopt = tp->listen_opt;
397 struct open_request *req, **prev;
399 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
400 (req = *prev) != NULL;
401 prev = &req->dl_next) {
402 if (req->rmt_port == rport &&
403 req->class->family == AF_INET6 &&
404 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
405 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
406 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
407 BUG_TRAP(req->sk == NULL);
416 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
417 struct in6_addr *saddr,
418 struct in6_addr *daddr,
421 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
424 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
426 if (skb->protocol == htons(ETH_P_IPV6)) {
427 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
428 skb->nh.ipv6h->saddr.s6_addr32,
432 return secure_tcp_sequence_number(skb->nh.iph->daddr,
439 static int tcp_v6_check_established(struct sock *sk)
441 struct inet_opt *inet = inet_sk(sk);
442 struct ipv6_pinfo *np = inet6_sk(sk);
443 struct in6_addr *daddr = &np->rcv_saddr;
444 struct in6_addr *saddr = &np->daddr;
445 int dif = sk->sk_bound_dev_if;
446 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
447 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
448 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
450 struct hlist_node *node;
451 struct tcp_tw_bucket *tw;
453 write_lock_bh(&head->lock);
455 /* Check TIME-WAIT sockets first. */
456 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
457 tw = (struct tcp_tw_bucket*)sk2;
459 if(*((__u32 *)&(tw->tw_dport)) == ports &&
460 sk2->sk_family == PF_INET6 &&
461 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
462 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
463 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
464 struct tcp_opt *tp = tcp_sk(sk);
466 if (tw->tw_ts_recent_stamp) {
467 /* See comment in tcp_ipv4.c */
468 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
471 tp->ts_recent = tw->tw_ts_recent;
472 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
481 /* And established part... */
482 sk_for_each(sk2, node, &head->chain) {
483 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
488 BUG_TRAP(sk_unhashed(sk));
489 __sk_add_node(sk, &head->chain);
490 sk->sk_hashent = hash;
491 sock_prot_inc_use(sk->sk_prot);
492 write_unlock_bh(&head->lock);
495 /* Silly. Should hash-dance instead... */
497 tcp_tw_deschedule(tw);
498 NET_INC_STATS_BH(TimeWaitRecycled);
506 write_unlock_bh(&head->lock);
507 return -EADDRNOTAVAIL;
510 static int tcp_v6_hash_connect(struct sock *sk)
512 struct tcp_bind_hashbucket *head;
513 struct tcp_bind_bucket *tb;
516 if (inet_sk(sk)->num == 0) {
517 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
520 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
523 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
526 spin_lock_bh(&head->lock);
528 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
530 spin_unlock_bh(&head->lock);
533 spin_unlock_bh(&head->lock);
534 return tcp_v6_check_established(sk);
538 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
540 return IP6CB(skb)->iif;
543 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
546 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
547 struct inet_opt *inet = inet_sk(sk);
548 struct ipv6_pinfo *np = inet6_sk(sk);
549 struct tcp_opt *tp = tcp_sk(sk);
550 struct in6_addr *saddr = NULL;
552 struct dst_entry *dst;
556 if (addr_len < SIN6_LEN_RFC2133)
559 if (usin->sin6_family != AF_INET6)
560 return(-EAFNOSUPPORT);
562 memset(&fl, 0, sizeof(fl));
565 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
566 IP6_ECN_flow_init(fl.fl6_flowlabel);
567 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
568 struct ip6_flowlabel *flowlabel;
569 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
570 if (flowlabel == NULL)
572 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
573 fl6_sock_release(flowlabel);
578 * connect() to INADDR_ANY means loopback (BSD'ism).
581 if(ipv6_addr_any(&usin->sin6_addr))
582 usin->sin6_addr.s6_addr[15] = 0x1;
584 addr_type = ipv6_addr_type(&usin->sin6_addr);
586 if(addr_type & IPV6_ADDR_MULTICAST)
589 if (addr_type&IPV6_ADDR_LINKLOCAL) {
590 if (addr_len >= sizeof(struct sockaddr_in6) &&
591 usin->sin6_scope_id) {
592 /* If interface is set while binding, indices
595 if (sk->sk_bound_dev_if &&
596 sk->sk_bound_dev_if != usin->sin6_scope_id)
599 sk->sk_bound_dev_if = usin->sin6_scope_id;
602 /* Connect to link-local address requires an interface */
603 if (!sk->sk_bound_dev_if)
607 if (tp->ts_recent_stamp &&
608 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
610 tp->ts_recent_stamp = 0;
614 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
615 np->flow_label = fl.fl6_flowlabel;
621 if (addr_type == IPV6_ADDR_MAPPED) {
622 u32 exthdrlen = tp->ext_header_len;
623 struct sockaddr_in sin;
625 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
627 if (__ipv6_only_sock(sk))
630 sin.sin_family = AF_INET;
631 sin.sin_port = usin->sin6_port;
632 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
634 tp->af_specific = &ipv6_mapped;
635 sk->sk_backlog_rcv = tcp_v4_do_rcv;
637 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
640 tp->ext_header_len = exthdrlen;
641 tp->af_specific = &ipv6_specific;
642 sk->sk_backlog_rcv = tcp_v6_do_rcv;
645 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
647 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
654 if (!ipv6_addr_any(&np->rcv_saddr))
655 saddr = &np->rcv_saddr;
657 fl.proto = IPPROTO_TCP;
658 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
659 ipv6_addr_copy(&fl.fl6_src,
660 (saddr ? saddr : &np->saddr));
661 fl.oif = sk->sk_bound_dev_if;
662 fl.fl_ip_dport = usin->sin6_port;
663 fl.fl_ip_sport = inet->sport;
665 if (np->opt && np->opt->srcrt) {
666 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
667 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
670 err = ip6_dst_lookup(sk, &dst, &fl);
677 ipv6_addr_copy(&np->rcv_saddr, saddr);
680 /* set the source address */
681 ipv6_addr_copy(&np->saddr, saddr);
682 inet->rcv_saddr = LOOPBACK4_IPV6;
684 ip6_dst_store(sk, dst, NULL);
685 sk->sk_route_caps = dst->dev->features &
686 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
688 tp->ext_header_len = 0;
690 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
691 tp->ext2_header_len = dst->header_len;
693 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
695 inet->dport = usin->sin6_port;
697 tcp_set_state(sk, TCP_SYN_SENT);
698 err = tcp_v6_hash_connect(sk);
703 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
708 err = tcp_connect(sk);
715 tcp_set_state(sk, TCP_CLOSE);
719 sk->sk_route_caps = 0;
723 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
724 int type, int code, int offset, __u32 info)
726 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
727 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
728 struct ipv6_pinfo *np;
734 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
737 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), Icmp6InErrors);
741 if (sk->sk_state == TCP_TIME_WAIT) {
742 tcp_tw_put((struct tcp_tw_bucket*)sk);
747 if (sock_owned_by_user(sk))
748 NET_INC_STATS_BH(LockDroppedIcmps);
750 if (sk->sk_state == TCP_CLOSE)
754 seq = ntohl(th->seq);
755 if (sk->sk_state != TCP_LISTEN &&
756 !between(seq, tp->snd_una, tp->snd_nxt)) {
757 NET_INC_STATS_BH(OutOfWindowIcmps);
763 if (type == ICMPV6_PKT_TOOBIG) {
764 struct dst_entry *dst = NULL;
766 if (sock_owned_by_user(sk))
768 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
771 /* icmp should have updated the destination cache entry */
772 dst = __sk_dst_check(sk, np->dst_cookie);
775 struct inet_opt *inet = inet_sk(sk);
778 /* BUGGG_FUTURE: Again, it is not clear how
779 to handle rthdr case. Ignore this complexity
782 memset(&fl, 0, sizeof(fl));
783 fl.proto = IPPROTO_TCP;
784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
785 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
786 fl.oif = sk->sk_bound_dev_if;
787 fl.fl_ip_dport = inet->dport;
788 fl.fl_ip_sport = inet->sport;
790 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
791 sk->sk_err_soft = -err;
797 if (tp->pmtu_cookie > dst_pmtu(dst)) {
798 tcp_sync_mss(sk, dst_pmtu(dst));
799 tcp_simple_retransmit(sk);
800 } /* else let the usual retransmit timer handle it */
805 icmpv6_err_convert(type, code, &err);
807 /* Might be for an open_request */
808 switch (sk->sk_state) {
809 struct open_request *req, **prev;
811 if (sock_owned_by_user(sk))
814 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
815 &hdr->saddr, tcp_v6_iif(skb));
819 /* ICMPs are not backlogged, hence we cannot get
820 * an established socket here.
822 BUG_TRAP(req->sk == NULL);
824 if (seq != req->snt_isn) {
825 NET_INC_STATS_BH(OutOfWindowIcmps);
829 tcp_synq_drop(sk, req, prev);
833 case TCP_SYN_RECV: /* Cannot happen.
834 It can, it SYNs are crossed. --ANK */
835 if (!sock_owned_by_user(sk)) {
836 TCP_INC_STATS_BH(TcpAttemptFails);
838 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
842 sk->sk_err_soft = err;
846 if (!sock_owned_by_user(sk) && np->recverr) {
848 sk->sk_error_report(sk);
850 sk->sk_err_soft = err;
858 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
859 struct dst_entry *dst)
861 struct ipv6_pinfo *np = inet6_sk(sk);
862 struct sk_buff * skb;
863 struct ipv6_txoptions *opt = NULL;
867 memset(&fl, 0, sizeof(fl));
868 fl.proto = IPPROTO_TCP;
869 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
870 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
871 fl.fl6_flowlabel = 0;
872 fl.oif = req->af.v6_req.iif;
873 fl.fl_ip_dport = req->rmt_port;
874 fl.fl_ip_sport = inet_sk(sk)->sport;
879 np->rxopt.bits.srcrt == 2 &&
880 req->af.v6_req.pktopts) {
881 struct sk_buff *pktopts = req->af.v6_req.pktopts;
882 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
884 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
887 if (opt && opt->srcrt) {
888 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
889 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
892 err = ip6_dst_lookup(sk, &dst, &fl);
897 skb = tcp_make_synack(sk, dst, req);
899 struct tcphdr *th = skb->h.th;
901 th->check = tcp_v6_check(th, skb->len,
902 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
903 csum_partial((char *)th, skb->len, skb->csum));
905 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
906 err = ip6_xmit(sk, skb, &fl, opt, 0);
907 if (err == NET_XMIT_CN)
913 if (opt && opt != np->opt)
914 sock_kfree_s(sk, opt, opt->tot_len);
918 static void tcp_v6_or_free(struct open_request *req)
920 if (req->af.v6_req.pktopts)
921 kfree_skb(req->af.v6_req.pktopts);
924 static struct or_calltable or_ipv6 = {
926 .rtx_syn_ack = tcp_v6_send_synack,
927 .send_ack = tcp_v6_or_send_ack,
928 .destructor = tcp_v6_or_free,
929 .send_reset = tcp_v6_send_reset
932 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
934 struct ipv6_pinfo *np = inet6_sk(sk);
935 struct inet6_skb_parm *opt = IP6CB(skb);
938 if ((opt->hop && np->rxopt.bits.hopopts) ||
939 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
940 np->rxopt.bits.rxflow) ||
941 (opt->srcrt && np->rxopt.bits.srcrt) ||
942 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
949 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
952 struct ipv6_pinfo *np = inet6_sk(sk);
954 if (skb->ip_summed == CHECKSUM_HW) {
955 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
956 skb->csum = offsetof(struct tcphdr, check);
958 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
959 csum_partial((char *)th, th->doff<<2,
965 static void tcp_v6_send_reset(struct sk_buff *skb)
967 struct tcphdr *th = skb->h.th, *t1;
968 struct sk_buff *buff;
974 if (!ipv6_unicast_destination(skb))
978 * We need to grab some memory, and put together an RST,
979 * and then put it into the queue to be sent.
982 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
986 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
988 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
990 /* Swap the send and the receive. */
991 memset(t1, 0, sizeof(*t1));
992 t1->dest = th->source;
993 t1->source = th->dest;
994 t1->doff = sizeof(*t1)/4;
998 t1->seq = th->ack_seq;
1001 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1002 + skb->len - (th->doff<<2));
1005 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1007 memset(&fl, 0, sizeof(fl));
1008 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1009 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1011 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1012 sizeof(*t1), IPPROTO_TCP,
1015 fl.proto = IPPROTO_TCP;
1016 fl.oif = tcp_v6_iif(skb);
1017 fl.fl_ip_dport = t1->dest;
1018 fl.fl_ip_sport = t1->source;
1020 /* sk = NULL, but it is safe for now. RST socket required. */
1021 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1022 ip6_xmit(NULL, buff, &fl, NULL, 0);
1023 TCP_INC_STATS_BH(TcpOutSegs);
1024 TCP_INC_STATS_BH(TcpOutRsts);
1031 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1033 struct tcphdr *th = skb->h.th, *t1;
1034 struct sk_buff *buff;
1036 int tot_len = sizeof(struct tcphdr);
1038 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1042 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1047 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1049 /* Swap the send and the receive. */
1050 memset(t1, 0, sizeof(*t1));
1051 t1->dest = th->source;
1052 t1->source = th->dest;
1053 t1->doff = tot_len/4;
1054 t1->seq = htonl(seq);
1055 t1->ack_seq = htonl(ack);
1057 t1->window = htons(win);
1060 u32 *ptr = (u32*)(t1 + 1);
1061 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1062 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1063 *ptr++ = htonl(tcp_time_stamp);
1067 buff->csum = csum_partial((char *)t1, tot_len, 0);
1069 memset(&fl, 0, sizeof(fl));
1070 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1071 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1073 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1074 tot_len, IPPROTO_TCP,
1077 fl.proto = IPPROTO_TCP;
1078 fl.oif = tcp_v6_iif(skb);
1079 fl.fl_ip_dport = t1->dest;
1080 fl.fl_ip_sport = t1->source;
1082 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1083 ip6_xmit(NULL, buff, &fl, NULL, 0);
1084 TCP_INC_STATS_BH(TcpOutSegs);
1091 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1093 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1095 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1096 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1101 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1103 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1107 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1109 struct open_request *req, **prev;
1110 struct tcphdr *th = skb->h.th;
1111 struct tcp_opt *tp = tcp_sk(sk);
1114 /* Find possible connection requests. */
1115 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1116 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1118 return tcp_check_req(sk, skb, req, prev);
1120 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1122 &skb->nh.ipv6h->daddr,
1127 if (nsk->sk_state != TCP_TIME_WAIT) {
1131 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1135 #if 0 /*def CONFIG_SYN_COOKIES*/
1136 if (!th->rst && !th->syn && th->ack)
1137 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1142 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1144 struct tcp_opt *tp = tcp_sk(sk);
1145 struct tcp_listen_opt *lopt = tp->listen_opt;
1146 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1149 req->expires = jiffies + TCP_TIMEOUT_INIT;
1151 req->dl_next = lopt->syn_table[h];
1153 write_lock(&tp->syn_wait_lock);
1154 lopt->syn_table[h] = req;
1155 write_unlock(&tp->syn_wait_lock);
1157 #ifdef CONFIG_ACCEPT_QUEUES
1158 tcp_synq_added(sk, req);
1165 /* FIXME: this is substantially similar to the ipv4 code.
1166 * Can some kind of merge be done? -- erics
1168 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1172 struct open_request *req = NULL;
1173 __u32 isn = TCP_SKB_CB(skb)->when;
1174 #ifdef CONFIG_ACCEPT_QUEUES
1178 if (skb->protocol == htons(ETH_P_IP))
1179 return tcp_v4_conn_request(sk, skb);
1181 if (!ipv6_unicast_destination(skb))
1186 * There are no SYN attacks on IPv6, yet...
1188 if (tcp_synq_is_full(sk) && !isn) {
1189 if (net_ratelimit())
1190 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1194 #ifdef CONFIG_ACCEPT_QUEUES
1195 class = (skb->nfmark <= 0) ? 0 :
1196 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1198 * Accept only if the class has shares set or if the default class
1199 * i.e. class 0 has shares
1201 if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1202 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1208 if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1210 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1215 req = tcp_openreq_alloc();
1219 tcp_clear_options(&tmptp);
1220 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1221 tmptp.user_mss = tp->user_mss;
1223 tcp_parse_options(skb, &tmptp, 0);
1225 tmptp.tstamp_ok = tmptp.saw_tstamp;
1226 tcp_openreq_init(req, &tmptp, skb);
1227 #ifdef CONFIG_ACCEPT_QUEUES
1228 req->acceptq_class = class;
1229 req->acceptq_time_stamp = jiffies;
1231 req->class = &or_ipv6;
1232 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1233 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1234 TCP_ECN_create_request(req, skb->h.th);
1235 req->af.v6_req.pktopts = NULL;
1236 if (ipv6_opt_accepted(sk, skb) ||
1237 np->rxopt.bits.rxinfo ||
1238 np->rxopt.bits.rxhlim) {
1239 atomic_inc(&skb->users);
1240 req->af.v6_req.pktopts = skb;
1242 req->af.v6_req.iif = sk->sk_bound_dev_if;
1244 /* So that link locals have meaning */
1245 if (!sk->sk_bound_dev_if &&
1246 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1247 req->af.v6_req.iif = tcp_v6_iif(skb);
1250 isn = tcp_v6_init_sequence(sk,skb);
1254 if (tcp_v6_send_synack(sk, req, NULL))
1257 tcp_v6_synq_add(sk, req);
1263 tcp_openreq_free(req);
1265 TCP_INC_STATS_BH(TcpAttemptFails);
1266 return 0; /* don't send reset */
1269 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1270 struct open_request *req,
1271 struct dst_entry *dst)
1273 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1274 struct tcp6_sock *newtcp6sk;
1275 struct inet_opt *newinet;
1276 struct tcp_opt *newtp;
1278 struct ipv6_txoptions *opt;
1280 if (skb->protocol == htons(ETH_P_IP)) {
1285 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1290 newtcp6sk = (struct tcp6_sock *)newsk;
1291 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1293 newinet = inet_sk(newsk);
1294 newnp = inet6_sk(newsk);
1295 newtp = tcp_sk(newsk);
1297 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1299 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1302 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1305 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1307 newtp->af_specific = &ipv6_mapped;
1308 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1309 newnp->pktoptions = NULL;
1311 newnp->mcast_oif = tcp_v6_iif(skb);
1312 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1314 /* Charge newly allocated IPv6 socket. Though it is mapped,
1317 #ifdef INET_REFCNT_DEBUG
1318 atomic_inc(&inet6_sock_nr);
1321 /* It is tricky place. Until this moment IPv4 tcp
1322 worked with IPv6 af_tcp.af_specific.
1325 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1332 #ifdef CONFIG_ACCEPT_QUEUES
1333 if (sk_acceptq_is_full(sk, req->acceptq_class))
1335 if (sk_acceptq_is_full(sk))
1339 if (np->rxopt.bits.srcrt == 2 &&
1340 opt == NULL && req->af.v6_req.pktopts) {
1341 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1343 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1349 memset(&fl, 0, sizeof(fl));
1350 fl.proto = IPPROTO_TCP;
1351 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1352 if (opt && opt->srcrt) {
1353 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1354 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1356 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1357 fl.oif = sk->sk_bound_dev_if;
1358 fl.fl_ip_dport = req->rmt_port;
1359 fl.fl_ip_sport = inet_sk(sk)->sport;
1361 if (ip6_dst_lookup(sk, &dst, &fl))
1365 newsk = tcp_create_openreq_child(sk, req, skb);
1369 /* Charge newly allocated IPv6 socket */
1370 #ifdef INET_REFCNT_DEBUG
1371 atomic_inc(&inet6_sock_nr);
1374 ip6_dst_store(newsk, dst, NULL);
1375 newsk->sk_route_caps = dst->dev->features &
1376 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1378 newtcp6sk = (struct tcp6_sock *)newsk;
1379 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1381 newtp = tcp_sk(newsk);
1382 newinet = inet_sk(newsk);
1383 newnp = inet6_sk(newsk);
1385 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1387 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1388 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1389 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1390 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1392 /* Now IPv6 options...
1394 First: no IPv4 options.
1396 newinet->opt = NULL;
1399 newnp->rxopt.all = np->rxopt.all;
1401 /* Clone pktoptions received with SYN */
1402 newnp->pktoptions = NULL;
1403 if (req->af.v6_req.pktopts) {
1404 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1406 kfree_skb(req->af.v6_req.pktopts);
1407 req->af.v6_req.pktopts = NULL;
1408 if (newnp->pktoptions)
1409 skb_set_owner_r(newnp->pktoptions, newsk);
1412 newnp->mcast_oif = tcp_v6_iif(skb);
1413 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1415 /* Clone native IPv6 options from listening socket (if any)
1417 Yes, keeping reference count would be much more clever,
1418 but we make one more one thing there: reattach optmem
1422 newnp->opt = ipv6_dup_options(newsk, opt);
1424 sock_kfree_s(sk, opt, opt->tot_len);
1427 newtp->ext_header_len = 0;
1429 newtp->ext_header_len = newnp->opt->opt_nflen +
1430 newnp->opt->opt_flen;
1431 newtp->ext2_header_len = dst->header_len;
1433 tcp_sync_mss(newsk, dst_pmtu(dst));
1434 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1435 tcp_initialize_rcv_mss(newsk);
1437 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1439 __tcp_v6_hash(newsk);
1440 tcp_inherit_port(sk, newsk);
1445 NET_INC_STATS_BH(ListenOverflows);
1447 NET_INC_STATS_BH(ListenDrops);
1448 if (opt && opt != np->opt)
1449 sock_kfree_s(sk, opt, opt->tot_len);
1454 static int tcp_v6_checksum_init(struct sk_buff *skb)
1456 if (skb->ip_summed == CHECKSUM_HW) {
1457 skb->ip_summed = CHECKSUM_UNNECESSARY;
1458 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1459 &skb->nh.ipv6h->daddr,skb->csum))
1461 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1463 if (skb->len <= 76) {
1464 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1465 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1467 skb->ip_summed = CHECKSUM_UNNECESSARY;
1469 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1470 &skb->nh.ipv6h->daddr,0);
1475 /* The socket must have it's spinlock held when we get
1478 * We have a potential double-lock case here, so even when
1479 * doing backlog processing we use the BH locking scheme.
1480 * This is because we cannot sleep with the original spinlock
1483 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1485 struct ipv6_pinfo *np = inet6_sk(sk);
1487 struct sk_buff *opt_skb = NULL;
1489 /* Imagine: socket is IPv6. IPv4 packet arrives,
1490 goes to IPv4 receive handler and backlogged.
1491 From backlog it always goes here. Kerboom...
1492 Fortunately, tcp_rcv_established and rcv_established
1493 handle them correctly, but it is not case with
1494 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1497 if (skb->protocol == htons(ETH_P_IP))
1498 return tcp_v4_do_rcv(sk, skb);
1500 if (sk_filter(sk, skb, 0))
1504 * socket locking is here for SMP purposes as backlog rcv
1505 * is currently called with bh processing disabled.
1508 /* Do Stevens' IPV6_PKTOPTIONS.
1510 Yes, guys, it is the only place in our code, where we
1511 may make it not affecting IPv4.
1512 The rest of code is protocol independent,
1513 and I do not like idea to uglify IPv4.
1515 Actually, all the idea behind IPV6_PKTOPTIONS
1516 looks not very well thought. For now we latch
1517 options, received in the last packet, enqueued
1518 by tcp. Feel free to propose better solution.
1522 opt_skb = skb_clone(skb, GFP_ATOMIC);
1524 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1525 TCP_CHECK_TIMER(sk);
1526 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1528 TCP_CHECK_TIMER(sk);
1530 goto ipv6_pktoptions;
1534 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1537 if (sk->sk_state == TCP_LISTEN) {
1538 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1543 * Queue it on the new socket if the new socket is active,
1544 * otherwise we just shortcircuit this and continue with
1548 if (tcp_child_process(sk, nsk, skb))
1551 __kfree_skb(opt_skb);
1556 TCP_CHECK_TIMER(sk);
1557 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1559 TCP_CHECK_TIMER(sk);
1561 goto ipv6_pktoptions;
1565 tcp_v6_send_reset(skb);
1568 __kfree_skb(opt_skb);
1572 TCP_INC_STATS_BH(TcpInErrs);
1577 /* Do you ask, what is it?
1579 1. skb was enqueued by tcp.
1580 2. skb is added to tail of read queue, rather than out of order.
1581 3. socket is not in passive state.
1582 4. Finally, it really contains options, which user wants to receive.
1585 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1586 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1587 if (np->rxopt.bits.rxinfo)
1588 np->mcast_oif = tcp_v6_iif(opt_skb);
1589 if (np->rxopt.bits.rxhlim)
1590 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1591 if (ipv6_opt_accepted(sk, opt_skb)) {
1592 skb_set_owner_r(opt_skb, sk);
1593 opt_skb = xchg(&np->pktoptions, opt_skb);
1595 __kfree_skb(opt_skb);
1596 opt_skb = xchg(&np->pktoptions, NULL);
1605 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1607 struct sk_buff *skb = *pskb;
1612 if (skb->pkt_type != PACKET_HOST)
1616 * Count it even if it's bad.
1618 TCP_INC_STATS_BH(TcpInSegs);
1620 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1625 if (th->doff < sizeof(struct tcphdr)/4)
1627 if (!pskb_may_pull(skb, th->doff*4))
1630 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1631 tcp_v6_checksum_init(skb) < 0))
1635 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1636 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1637 skb->len - th->doff*4);
1638 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1639 TCP_SKB_CB(skb)->when = 0;
1640 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1641 TCP_SKB_CB(skb)->sacked = 0;
1643 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1644 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1650 if (sk->sk_state == TCP_TIME_WAIT)
1653 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1654 goto discard_and_relse;
1656 if (sk_filter(sk, skb, 0))
1657 goto discard_and_relse;
1663 if (!sock_owned_by_user(sk)) {
1664 if (!tcp_prequeue(sk, skb))
1665 ret = tcp_v6_do_rcv(sk, skb);
1667 sk_add_backlog(sk, skb);
1671 return ret ? -1 : 0;
1674 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1677 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1679 TCP_INC_STATS_BH(TcpInErrs);
1681 tcp_v6_send_reset(skb);
1698 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1699 tcp_tw_put((struct tcp_tw_bucket *) sk);
1703 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1704 TCP_INC_STATS_BH(TcpInErrs);
1705 tcp_tw_put((struct tcp_tw_bucket *) sk);
1709 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1710 skb, th, skb->len)) {
1715 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1717 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1718 tcp_tw_put((struct tcp_tw_bucket *)sk);
1722 /* Fall through to ACK */
1725 tcp_v6_timewait_ack(sk, skb);
1729 case TCP_TW_SUCCESS:;
1734 static int tcp_v6_rebuild_header(struct sock *sk)
1737 struct dst_entry *dst;
1738 struct ipv6_pinfo *np = inet6_sk(sk);
1740 dst = __sk_dst_check(sk, np->dst_cookie);
1743 struct inet_opt *inet = inet_sk(sk);
1746 memset(&fl, 0, sizeof(fl));
1747 fl.proto = IPPROTO_TCP;
1748 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1749 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1750 fl.fl6_flowlabel = np->flow_label;
1751 fl.oif = sk->sk_bound_dev_if;
1752 fl.fl_ip_dport = inet->dport;
1753 fl.fl_ip_sport = inet->sport;
1755 if (np->opt && np->opt->srcrt) {
1756 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1757 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1760 err = ip6_dst_lookup(sk, &dst, &fl);
1763 sk->sk_route_caps = 0;
1767 ip6_dst_store(sk, dst, NULL);
1768 sk->sk_route_caps = dst->dev->features &
1769 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1770 tcp_sk(sk)->ext2_header_len = dst->header_len;
1776 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1778 struct sock *sk = skb->sk;
1779 struct inet_opt *inet = inet_sk(sk);
1780 struct ipv6_pinfo *np = inet6_sk(sk);
1782 struct dst_entry *dst;
1784 memset(&fl, 0, sizeof(fl));
1785 fl.proto = IPPROTO_TCP;
1786 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1787 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1788 fl.fl6_flowlabel = np->flow_label;
1789 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1790 fl.oif = sk->sk_bound_dev_if;
1791 fl.fl_ip_sport = inet->sport;
1792 fl.fl_ip_dport = inet->dport;
1794 if (np->opt && np->opt->srcrt) {
1795 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1796 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1799 dst = __sk_dst_check(sk, np->dst_cookie);
1802 int err = ip6_dst_lookup(sk, &dst, &fl);
1805 sk->sk_err_soft = -err;
1809 ip6_dst_store(sk, dst, NULL);
1810 sk->sk_route_caps = dst->dev->features &
1811 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1812 tcp_sk(sk)->ext2_header_len = dst->header_len;
1815 skb->dst = dst_clone(dst);
1817 /* Restore final destination back after routing done */
1818 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1820 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1823 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1825 struct ipv6_pinfo *np = inet6_sk(sk);
1826 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1828 sin6->sin6_family = AF_INET6;
1829 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1830 sin6->sin6_port = inet_sk(sk)->dport;
1831 /* We do not store received flowlabel for TCP */
1832 sin6->sin6_flowinfo = 0;
1833 sin6->sin6_scope_id = 0;
1834 if (sk->sk_bound_dev_if &&
1835 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1836 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1839 static int tcp_v6_remember_stamp(struct sock *sk)
1841 /* Alas, not yet... */
1845 static struct tcp_func ipv6_specific = {
1846 .queue_xmit = tcp_v6_xmit,
1847 .send_check = tcp_v6_send_check,
1848 .rebuild_header = tcp_v6_rebuild_header,
1849 .conn_request = tcp_v6_conn_request,
1850 .syn_recv_sock = tcp_v6_syn_recv_sock,
1851 .remember_stamp = tcp_v6_remember_stamp,
1852 .net_header_len = sizeof(struct ipv6hdr),
1854 .setsockopt = ipv6_setsockopt,
1855 .getsockopt = ipv6_getsockopt,
1856 .addr2sockaddr = v6_addr2sockaddr,
1857 .sockaddr_len = sizeof(struct sockaddr_in6)
1861 * TCP over IPv4 via INET6 API
1864 static struct tcp_func ipv6_mapped = {
1865 .queue_xmit = ip_queue_xmit,
1866 .send_check = tcp_v4_send_check,
1867 .rebuild_header = tcp_v4_rebuild_header,
1868 .conn_request = tcp_v6_conn_request,
1869 .syn_recv_sock = tcp_v6_syn_recv_sock,
1870 .remember_stamp = tcp_v4_remember_stamp,
1871 .net_header_len = sizeof(struct iphdr),
1873 .setsockopt = ipv6_setsockopt,
1874 .getsockopt = ipv6_getsockopt,
1875 .addr2sockaddr = v6_addr2sockaddr,
1876 .sockaddr_len = sizeof(struct sockaddr_in6)
1881 /* NOTE: A lot of things set to zero explicitly by call to
1882 * sk_alloc() so need not be done here.
1884 static int tcp_v6_init_sock(struct sock *sk)
1886 struct tcp_opt *tp = tcp_sk(sk);
1888 skb_queue_head_init(&tp->out_of_order_queue);
1889 tcp_init_xmit_timers(sk);
1890 tcp_prequeue_init(tp);
1892 tp->rto = TCP_TIMEOUT_INIT;
1893 tp->mdev = TCP_TIMEOUT_INIT;
1895 /* So many TCP implementations out there (incorrectly) count the
1896 * initial SYN frame in their delayed-ACK and congestion control
1897 * algorithms that we must have the following bandaid to talk
1898 * efficiently to them. -DaveM
1902 /* See draft-stevens-tcpca-spec-01 for discussion of the
1903 * initialization of these values.
1905 tp->snd_ssthresh = 0x7fffffff;
1906 tp->snd_cwnd_clamp = ~0;
1907 tp->mss_cache = 536;
1909 tp->reordering = sysctl_tcp_reordering;
1911 sk->sk_state = TCP_CLOSE;
1913 tp->af_specific = &ipv6_specific;
1915 sk->sk_write_space = sk_stream_write_space;
1916 sk->sk_use_write_queue = 1;
1918 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1919 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1921 atomic_inc(&tcp_sockets_allocated);
1926 static int tcp_v6_destroy_sock(struct sock *sk)
1928 extern int tcp_v4_destroy_sock(struct sock *sk);
1930 tcp_v4_destroy_sock(sk);
1931 return inet6_destroy_sock(sk);
1934 /* Proc filesystem TCPv6 sock list dumping. */
1935 static void get_openreq6(struct seq_file *seq,
1936 struct sock *sk, struct open_request *req, int i, int uid)
1938 struct in6_addr *dest, *src;
1939 int ttd = req->expires - jiffies;
1944 src = &req->af.v6_req.loc_addr;
1945 dest = &req->af.v6_req.rmt_addr;
1947 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1948 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1950 src->s6_addr32[0], src->s6_addr32[1],
1951 src->s6_addr32[2], src->s6_addr32[3],
1952 ntohs(inet_sk(sk)->sport),
1953 dest->s6_addr32[0], dest->s6_addr32[1],
1954 dest->s6_addr32[2], dest->s6_addr32[3],
1955 ntohs(req->rmt_port),
1957 0,0, /* could print option size, but that is af dependent. */
1958 1, /* timers active (only the expire timer) */
1959 jiffies_to_clock_t(ttd),
1962 0, /* non standard timer */
1963 0, /* open_requests have no inode */
1967 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1969 struct in6_addr *dest, *src;
1972 unsigned long timer_expires;
1973 struct inet_opt *inet = inet_sk(sp);
1974 struct tcp_opt *tp = tcp_sk(sp);
1975 struct ipv6_pinfo *np = inet6_sk(sp);
1978 src = &np->rcv_saddr;
1979 destp = ntohs(inet->dport);
1980 srcp = ntohs(inet->sport);
1981 if (tp->pending == TCP_TIME_RETRANS) {
1983 timer_expires = tp->timeout;
1984 } else if (tp->pending == TCP_TIME_PROBE0) {
1986 timer_expires = tp->timeout;
1987 } else if (timer_pending(&sp->sk_timer)) {
1989 timer_expires = sp->sk_timer.expires;
1992 timer_expires = jiffies;
1996 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1997 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1999 src->s6_addr32[0], src->s6_addr32[1],
2000 src->s6_addr32[2], src->s6_addr32[3], srcp,
2001 dest->s6_addr32[0], dest->s6_addr32[1],
2002 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2004 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2006 jiffies_to_clock_t(timer_expires - jiffies),
2011 atomic_read(&sp->sk_refcnt), sp,
2012 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2013 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2017 static void get_timewait6_sock(struct seq_file *seq,
2018 struct tcp_tw_bucket *tw, int i)
2020 struct in6_addr *dest, *src;
2022 int ttd = tw->tw_ttd - jiffies;
2027 dest = &tw->tw_v6_daddr;
2028 src = &tw->tw_v6_rcv_saddr;
2029 destp = ntohs(tw->tw_dport);
2030 srcp = ntohs(tw->tw_sport);
2033 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2034 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2036 src->s6_addr32[0], src->s6_addr32[1],
2037 src->s6_addr32[2], src->s6_addr32[3], srcp,
2038 dest->s6_addr32[0], dest->s6_addr32[1],
2039 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2040 tw->tw_substate, 0, 0,
2041 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2042 atomic_read(&tw->tw_refcnt), tw);
2045 #ifdef CONFIG_PROC_FS
2046 static int tcp6_seq_show(struct seq_file *seq, void *v)
2048 struct tcp_iter_state *st;
2050 if (v == SEQ_START_TOKEN) {
2055 "st tx_queue rx_queue tr tm->when retrnsmt"
2056 " uid timeout inode\n");
2061 switch (st->state) {
2062 case TCP_SEQ_STATE_LISTENING:
2063 case TCP_SEQ_STATE_ESTABLISHED:
2064 get_tcp6_sock(seq, v, st->num);
2066 case TCP_SEQ_STATE_OPENREQ:
2067 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2069 case TCP_SEQ_STATE_TIME_WAIT:
2070 get_timewait6_sock(seq, v, st->num);
2077 static struct file_operations tcp6_seq_fops;
2078 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2079 .owner = THIS_MODULE,
2082 .seq_show = tcp6_seq_show,
2083 .seq_fops = &tcp6_seq_fops,
2086 int __init tcp6_proc_init(void)
2088 return tcp_proc_register(&tcp6_seq_afinfo);
2091 void tcp6_proc_exit(void)
2093 tcp_proc_unregister(&tcp6_seq_afinfo);
2097 struct proto tcpv6_prot = {
2100 .connect = tcp_v6_connect,
2101 .disconnect = tcp_disconnect,
2102 .accept = tcp_accept,
2104 .init = tcp_v6_init_sock,
2105 .destroy = tcp_v6_destroy_sock,
2106 .shutdown = tcp_shutdown,
2107 .setsockopt = tcp_setsockopt,
2108 .getsockopt = tcp_getsockopt,
2109 .sendmsg = tcp_sendmsg,
2110 .recvmsg = tcp_recvmsg,
2111 .backlog_rcv = tcp_v6_do_rcv,
2112 .hash = tcp_v6_hash,
2113 .unhash = tcp_unhash,
2114 .get_port = tcp_v6_get_port,
2115 .enter_memory_pressure = tcp_enter_memory_pressure,
2116 .sockets_allocated = &tcp_sockets_allocated,
2117 .memory_allocated = &tcp_memory_allocated,
2118 .memory_pressure = &tcp_memory_pressure,
2119 .sysctl_mem = sysctl_tcp_mem,
2120 .sysctl_wmem = sysctl_tcp_wmem,
2121 .sysctl_rmem = sysctl_tcp_rmem,
2122 .max_header = MAX_TCP_HEADER,
2125 static struct inet6_protocol tcpv6_protocol = {
2126 .handler = tcp_v6_rcv,
2127 .err_handler = tcp_v6_err,
2128 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2131 extern struct proto_ops inet6_stream_ops;
2133 static struct inet_protosw tcpv6_protosw = {
2134 .type = SOCK_STREAM,
2135 .protocol = IPPROTO_TCP,
2136 .prot = &tcpv6_prot,
2137 .ops = &inet6_stream_ops,
2140 .flags = INET_PROTOSW_PERMANENT,
2143 void __init tcpv6_init(void)
2145 /* register inet6 protocol */
2146 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2147 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2148 inet6_register_protosw(&tcpv6_protosw);