vserver 2.0 rc7
[linux-2.6.git] / net / ipv6 / tcp_ipv6.c
index 5167758..0f69e80 100644 (file)
@@ -139,9 +139,12 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
                int rover;
 
                spin_lock(&tcp_portalloc_lock);
-               rover = tcp_port_rover;
+               if (tcp_port_rover < low)
+                       rover = low;
+               else
+                       rover = tcp_port_rover;
                do {    rover++;
-                       if ((rover < low) || (rover > high))
+                       if (rover > high)
                                rover = low;
                        head = &tcp_bhash[tcp_bhashfn(rover)];
                        spin_lock(&head->lock);
@@ -441,21 +444,22 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
        }
 }
 
-static int tcp_v6_check_established(struct sock *sk)
+static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
+                                     struct tcp_tw_bucket **twp)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct in6_addr *daddr = &np->rcv_saddr;
        struct in6_addr *saddr = &np->daddr;
        int dif = sk->sk_bound_dev_if;
-       u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
+       u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
        int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
        struct tcp_ehash_bucket *head = &tcp_ehash[hash];
        struct sock *sk2;
        struct hlist_node *node;
        struct tcp_tw_bucket *tw;
 
-       write_lock_bh(&head->lock);
+       write_lock(&head->lock);
 
        /* Check TIME-WAIT sockets first. */
        sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
@@ -468,7 +472,10 @@ static int tcp_v6_check_established(struct sock *sk)
                   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
                        struct tcp_sock *tp = tcp_sk(sk);
 
-                       if (tw->tw_ts_recent_stamp) {
+                       if (tw->tw_ts_recent_stamp &&
+                           (!twp || (sysctl_tcp_tw_reuse &&
+                                     xtime.tv_sec - 
+                                     tw->tw_ts_recent_stamp > 1))) {
                                /* See comment in tcp_ipv4.c */
                                tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
                                if (!tp->write_seq)
@@ -494,40 +501,113 @@ unique:
        __sk_add_node(sk, &head->chain);
        sk->sk_hashent = hash;
        sock_prot_inc_use(sk->sk_prot);
-       write_unlock_bh(&head->lock);
+       write_unlock(&head->lock);
 
-       if (tw) {
+       if (twp) {
+               *twp = tw;
+               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+       } else if (tw) {
                /* Silly. Should hash-dance instead... */
-               local_bh_disable();
                tcp_tw_deschedule(tw);
                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-               local_bh_enable();
 
                tcp_tw_put(tw);
        }
        return 0;
 
 not_unique:
-       write_unlock_bh(&head->lock);
+       write_unlock(&head->lock);
        return -EADDRNOTAVAIL;
 }
 
-static int tcp_v6_hash_connect(struct sock *sk)
+static inline u32 tcpv6_port_offset(const struct sock *sk)
 {
-       struct tcp_bind_hashbucket *head;
-       struct tcp_bind_bucket *tb;
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct ipv6_pinfo *np = inet6_sk(sk);
 
-       /* XXX */
-       if (inet_sk(sk)->num == 0) { 
-               int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
-               if (err)
-                       return err;
-               inet_sk(sk)->sport = htons(inet_sk(sk)->num);
-       }
+       return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+                                          np->daddr.s6_addr32,
+                                          inet->dport);
+}
 
-       head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
-       tb = tb_head(head);
+static int tcp_v6_hash_connect(struct sock *sk)
+{
+       unsigned short snum = inet_sk(sk)->num;
+       struct tcp_bind_hashbucket *head;
+       struct tcp_bind_bucket *tb;
+       int ret;
 
+       if (!snum) {
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int range = high - low;
+               int i;
+               int port;
+               static u32 hint;
+               u32 offset = hint + tcpv6_port_offset(sk);
+               struct hlist_node *node;
+               struct tcp_tw_bucket *tw = NULL;
+
+               local_bh_disable();
+               for (i = 1; i <= range; i++) {
+                       port = low + (i + offset) % range;
+                       head = &tcp_bhash[tcp_bhashfn(port)];
+                       spin_lock(&head->lock);
+
+                       /* Does not bother with rcv_saddr checks,
+                        * because the established check is already
+                        * unique enough.
+                        */
+                       tb_for_each(tb, node, &head->chain) {
+                               if (tb->port == port) {
+                                       BUG_TRAP(!hlist_empty(&tb->owners));
+                                       if (tb->fastreuse >= 0)
+                                               goto next_port;
+                                       if (!__tcp_v6_check_established(sk,
+                                                                       port,
+                                                                       &tw))
+                                               goto ok;
+                                       goto next_port;
+                               }
+                       }
+
+                       tb = tcp_bucket_create(head, port);
+                       if (!tb) {
+                               spin_unlock(&head->lock);
+                               break;
+                       }
+                       tb->fastreuse = -1;
+                       goto ok;
+
+               next_port:
+                       spin_unlock(&head->lock);
+               }
+               local_bh_enable();
+
+               return -EADDRNOTAVAIL;
+
+ok:
+               hint += i;
+
+               /* Head lock still held and bh's disabled */
+               tcp_bind_hash(sk, tb, port);
+               if (sk_unhashed(sk)) {
+                       inet_sk(sk)->sport = htons(port);
+                       __tcp_v6_hash(sk);
+               }
+               spin_unlock(&head->lock);
+
+               if (tw) {
+                       tcp_tw_deschedule(tw);
+                       tcp_tw_put(tw);
+               }
+
+               ret = 0;
+               goto out;
+       }
+
+       head  = &tcp_bhash[tcp_bhashfn(snum)];
+       tb  = tcp_sk(sk)->bind_hash;
        spin_lock_bh(&head->lock);
 
        if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -535,8 +615,12 @@ static int tcp_v6_hash_connect(struct sock *sk)
                spin_unlock_bh(&head->lock);
                return 0;
        } else {
-               spin_unlock_bh(&head->lock);
-               return tcp_v6_check_established(sk);
+               spin_unlock(&head->lock);
+               /* No definite answer... Walk to established hash table */
+               ret = __tcp_v6_check_established(sk, snum, NULL);
+out:
+               local_bh_enable();
+               return ret;
        }
 }
 
@@ -701,7 +785,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        tp->ext_header_len = 0;
        if (np->opt)
                tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
-       tp->ext2_header_len = dst->header_len;
 
        tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -813,8 +896,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                } else
                        dst_hold(dst);
 
-               if (tp->pmtu_cookie > dst_pmtu(dst)) {
-                       tcp_sync_mss(sk, dst_pmtu(dst));
+               if (tp->pmtu_cookie > dst_mtu(dst)) {
+                       tcp_sync_mss(sk, dst_mtu(dst));
                        tcp_simple_retransmit(sk);
                } /* else let the usual retransmit timer handle it */
                dst_release(dst);
@@ -1443,9 +1526,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        if (newnp->opt)
                newtp->ext_header_len = newnp->opt->opt_nflen +
                                        newnp->opt->opt_flen;
-       newtp->ext2_header_len = dst->header_len;
 
-       tcp_sync_mss(newsk, dst_pmtu(dst));
+       tcp_sync_mss(newsk, dst_mtu(dst));
        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
        tcp_initialize_rcv_mss(newsk);
 
@@ -1792,7 +1874,6 @@ static int tcp_v6_rebuild_header(struct sock *sk)
                ip6_dst_store(sk, dst, NULL);
                sk->sk_route_caps = dst->dev->features &
                        ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-               tcp_sk(sk)->ext2_header_len = dst->header_len;
        }
 
        return 0;
@@ -1846,7 +1927,6 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
                ip6_dst_store(sk, dst, NULL);
                sk->sk_route_caps = dst->dev->features &
                        ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-               tcp_sk(sk)->ext2_header_len = dst->header_len;
        }
 
        skb->dst = dst_clone(dst);
@@ -1950,7 +2030,7 @@ static int tcp_v6_init_sock(struct sock *sk)
        tp->af_specific = &ipv6_specific;
 
        sk->sk_write_space = sk_stream_write_space;
-       sk->sk_use_write_queue = 1;
+       sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
        sk->sk_sndbuf = sysctl_tcp_wmem[1];
        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -2158,7 +2238,7 @@ struct proto tcpv6_prot = {
        .sysctl_wmem            = sysctl_tcp_wmem,
        .sysctl_rmem            = sysctl_tcp_rmem,
        .max_header             = MAX_TCP_HEADER,
-       .slab_obj_size          = sizeof(struct tcp6_sock),
+       .obj_size               = sizeof(struct tcp6_sock),
 };
 
 static struct inet6_protocol tcpv6_protocol = {