VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / ipv4 / tcp_output.c
index bc5fba4..68ab99f 100644 (file)
@@ -48,9 +48,9 @@ int sysctl_tcp_retrans_collapse = 1;
 static __inline__
 void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
 {
-       tp->send_head = skb->next;
-       if (tp->send_head == (struct sk_buff *)&sk->sk_write_queue)
-               tp->send_head = NULL;
+       sk->sk_send_head = skb->next;
+       if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
+               sk->sk_send_head = NULL;
        tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
        if (tp->packets_out++ == 0)
                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
@@ -168,6 +168,14 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
        tp->rcv_wnd = new_win;
        tp->rcv_wup = tp->rcv_nxt;
 
+       /* Make sure we do not exceed the maximum possible
+        * scaled window.
+        */
+       if (!tp->rcv_wscale)
+               new_win = min(new_win, MAX_TCP_WINDOW);
+       else
+               new_win = min(new_win, (65535U << tp->rcv_wscale));
+
        /* RFC1323 scaling applied */
        new_win >>= tp->rcv_wscale;
 
@@ -291,7 +299,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                if (skb->len != tcp_header_size)
                        tcp_event_data_sent(tp, skb, sk);
 
-               TCP_INC_STATS(TcpOutSegs);
+               TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
                err = tp->af_specific->queue_xmit(skb, 0);
                if (err <= 0)
@@ -326,11 +334,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
        /* Advance write_seq and place onto the write_queue. */
        tp->write_seq = TCP_SKB_CB(skb)->end_seq;
        __skb_queue_tail(&sk->sk_write_queue, skb);
-       tcp_charge_skb(sk, skb);
+       sk_charge_skb(sk, skb);
 
        /* Queue it, remembering where we must start sending. */
-       if (tp->send_head == NULL)
-               tp->send_head = skb;
+       if (sk->sk_send_head == NULL)
+               sk->sk_send_head = skb;
 }
 
 /* Send _single_ skb sitting at the send head. This function requires
@@ -339,13 +347,13 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 void tcp_push_one(struct sock *sk, unsigned cur_mss)
 {
        struct tcp_opt *tp = tcp_sk(sk);
-       struct sk_buff *skb = tp->send_head;
+       struct sk_buff *skb = sk->sk_send_head;
 
        if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
                /* Send it out now. */
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
                if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
-                       tp->send_head = NULL;
+                       sk->sk_send_head = NULL;
                        tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
                        if (tp->packets_out++ == 0)
                                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
@@ -354,70 +362,6 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss)
        }
 }
 
-/* Split fragmented skb to two parts at length len. */
-
-static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len)
-{
-       int i;
-       int pos = skb_headlen(skb);
-
-       if (len < pos) {
-               /* Split line is inside header. */
-               memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len);
-
-               /* And move data appendix as is. */
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                       skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
-
-               skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
-               skb_shinfo(skb)->nr_frags = 0;
-
-               skb1->data_len = skb->data_len;
-               skb1->len += skb1->data_len;
-               skb->data_len = 0;
-               skb->len = len;
-               skb->tail = skb->data+len;
-       } else {
-               int k = 0;
-               int nfrags = skb_shinfo(skb)->nr_frags;
-
-               /* Second chunk has no header, nothing to copy. */
-
-               skb_shinfo(skb)->nr_frags = 0;
-               skb1->len = skb1->data_len = skb->len - len;
-               skb->len = len;
-               skb->data_len = len - pos;
-
-               for (i=0; i<nfrags; i++) {
-                       int size = skb_shinfo(skb)->frags[i].size;
-                       if (pos + size > len) {
-                               skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
-
-                               if (pos < len) {
-                                       /* Split frag.
-                                        * We have to variants in this case:
-                                        * 1. Move all the frag to the second
-                                        *    part, if it is possible. F.e.
-                                        *    this approach is mandatory for TUX,
-                                        *    where splitting is expensive.
-                                        * 2. Split is accurately. We make this.
-                                        */
-                                       get_page(skb_shinfo(skb)->frags[i].page);
-                                       skb_shinfo(skb1)->frags[0].page_offset += (len-pos);
-                                       skb_shinfo(skb1)->frags[0].size -= (len-pos);
-                                       skb_shinfo(skb)->frags[i].size = len-pos;
-                                       skb_shinfo(skb)->nr_frags++;
-                               }
-                               k++;
-                       } else {
-                               skb_shinfo(skb)->nr_frags++;
-                       }
-                       pos += size;
-               }
-               skb_shinfo(skb1)->nr_frags = k;
-       }
-}
-
 /* Function to create two new TCP segments.  Shrinks the given segment
  * to the specified size and appends a new segment with the rest of the
  * packet to the list.  This won't be called frequently, I hope. 
@@ -436,10 +380,10 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
                return -ENOMEM;
 
        /* Get a new skb... force flag on. */
-       buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC);
+       buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
        if (buff == NULL)
                return -ENOMEM; /* We'll just try again later. */
-       tcp_charge_skb(sk, buff);
+       sk_charge_skb(sk, buff);
 
        /* Correct the sequence numbers. */
        TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
@@ -636,8 +580,10 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
                 */
                mss_now = tcp_current_mss(sk, 1);
 
-               while((skb = tp->send_head) &&
-                     tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)) {
+               while ((skb = sk->sk_send_head) &&
+                      tcp_snd_test(tp, skb, mss_now,
+                                   tcp_skb_is_last(sk, skb) ? nonagle :
+                                                              TCP_NAGLE_PUSH)) {
                        if (skb->len > mss_now) {
                                if (tcp_fragment(sk, skb, mss_now))
                                        break;
@@ -657,7 +603,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
                        return 0;
                }
 
-               return !tp->packets_out && tp->send_head;
+               return !tp->packets_out && sk->sk_send_head;
        }
        return 0;
 }
@@ -744,17 +690,32 @@ u32 __tcp_select_window(struct sock *sk)
        if (free_space > tp->rcv_ssthresh)
                free_space = tp->rcv_ssthresh;
 
-       /* Get the largest window that is a nice multiple of mss.
-        * Window clamp already applied above.
-        * If our current window offering is within 1 mss of the
-        * free space we just keep it. This prevents the divide
-        * and multiply from happening most of the time.
-        * We also don't do any window rounding when the free space
-        * is too small.
+       /* Don't do rounding if we are using window scaling, since the
+        * scaled window will not line up with the MSS boundary anyway.
         */
        window = tp->rcv_wnd;
-       if (window <= free_space - mss || window > free_space)
-               window = (free_space/mss)*mss;
+       if (tp->rcv_wscale) {
+               window = free_space;
+
+               /* Advertise enough space so that it won't get scaled away.
+                * Import case: prevent zero window announcement if
+                * 1<<rcv_wscale > mss.
+                */
+               if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window)
+                       window = (((window >> tp->rcv_wscale) + 1)
+                                 << tp->rcv_wscale);
+       } else {
+               /* Get the largest window that is a nice multiple of mss.
+                * Window clamp already applied above.
+                * If our current window offering is within 1 mss of the
+                * free space we just keep it. This prevents the divide
+                * and multiply from happening most of the time.
+                * We also don't do any window rounding when the free space
+                * is too small.
+                */
+               if (window <= free_space - mss || window > free_space)
+                       window = (free_space/mss)*mss;
+       }
 
        return window;
 }
@@ -827,7 +788,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                 */
                if (tp->fackets_out)
                        tp->fackets_out--;
-               tcp_free_skb(sk, next_skb);
+               sk_stream_free_skb(sk, next_skb);
                tp->packets_out--;
        }
 }
@@ -843,7 +804,7 @@ void tcp_simple_retransmit(struct sock *sk)
        unsigned int mss = tcp_current_mss(sk, 0);
        int lost = 0;
 
-       for_retrans_queue(skb, sk, tp) {
+       sk_stream_for_retrans_queue(skb, sk) {
                if (skb->len > mss && 
                    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
                        if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -929,7 +890,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        /* Collapse two adjacent packets if worthwhile and we can. */
        if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
           (skb->len < (cur_mss >> 1)) &&
-          (skb->next != tp->send_head) &&
+          (skb->next != sk->sk_send_head) &&
           (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
           (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
           (sysctl_tcp_retrans_collapse != 0))
@@ -963,7 +924,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 
        if (err == 0) {
                /* Update global TCP statistics. */
-               TCP_INC_STATS(TcpRetransSegs);
+               TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
 
 #if FASTRETRANS_DEBUG > 0
                if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1004,7 +965,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
        /* First pass: retransmit lost packets. */
        if (packet_cnt) {
-               for_retrans_queue(skb, sk, tp) {
+               sk_stream_for_retrans_queue(skb, sk) {
                        __u8 sacked = TCP_SKB_CB(skb)->sacked;
 
                        if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -1015,9 +976,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                                        if (tcp_retransmit_skb(sk, skb))
                                                return;
                                        if (tp->ca_state != TCP_CA_Loss)
-                                               NET_INC_STATS_BH(TCPFastRetrans);
+                                               NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
                                        else
-                                               NET_INC_STATS_BH(TCPSlowStartRetrans);
+                                               NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
 
                                        if (skb ==
                                            skb_peek(&sk->sk_write_queue))
@@ -1052,7 +1013,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
        packet_cnt = 0;
 
-       for_retrans_queue(skb, sk, tp) {
+       sk_stream_for_retrans_queue(skb, sk) {
                if(++packet_cnt > tp->fackets_out)
                        break;
 
@@ -1069,7 +1030,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                if (skb == skb_peek(&sk->sk_write_queue))
                        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 
-               NET_INC_STATS_BH(TCPForwardRetrans);
+               NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
        }
 }
 
@@ -1089,7 +1050,7 @@ void tcp_send_fin(struct sock *sk)
         */
        mss_now = tcp_current_mss(sk, 1); 
 
-       if(tp->send_head != NULL) {
+       if (sk->sk_send_head != NULL) {
                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
                TCP_SKB_CB(skb)->end_seq++;
                tp->write_seq++;
@@ -1129,7 +1090,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
        /* NOTE: No TCP options attached and we never retransmit this. */
        skb = alloc_skb(MAX_TCP_HEADER, priority);
        if (!skb) {
-               NET_INC_STATS(TCPAbortFailed);
+               NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
                return;
        }
 
@@ -1144,7 +1105,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
        if (tcp_transmit_skb(sk, skb))
-               NET_INC_STATS(TCPAbortFailed);
+               NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
 
 /* WARNING: This routine must only be called when we have already sent
@@ -1168,8 +1129,8 @@ int tcp_send_synack(struct sock *sk)
                                return -ENOMEM;
                        __skb_unlink(skb, &sk->sk_write_queue);
                        __skb_queue_head(&sk->sk_write_queue, nskb);
-                       tcp_free_skb(sk, skb);
-                       tcp_charge_skb(sk, nskb);
+                       sk_stream_free_skb(sk, skb);
+                       sk_charge_skb(sk, nskb);
                        skb = nskb;
                }
 
@@ -1244,7 +1205,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
        skb->csum = 0;
        th->doff = (tcp_header_size >> 2);
-       TCP_INC_STATS(TcpOutSegs);
+       TCP_INC_STATS(TCP_MIB_OUTSEGS);
        return skb;
 }
 
@@ -1329,10 +1290,10 @@ int tcp_connect(struct sock *sk)
        TCP_SKB_CB(buff)->when = tcp_time_stamp;
        tp->retrans_stamp = TCP_SKB_CB(buff)->when;
        __skb_queue_tail(&sk->sk_write_queue, buff);
-       tcp_charge_skb(sk, buff);
+       sk_charge_skb(sk, buff);
        tp->packets_out++;
        tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
-       TCP_INC_STATS(TcpActiveOpens);
+       TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
        /* Timer for repeating the SYN until an answer. */
        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
@@ -1468,7 +1429,7 @@ int tcp_write_wakeup(struct sock *sk)
                struct tcp_opt *tp = tcp_sk(sk);
                struct sk_buff *skb;
 
-               if ((skb = tp->send_head) != NULL &&
+               if ((skb = sk->sk_send_head) != NULL &&
                    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
                        int err;
                        int mss = tcp_current_mss(sk, 0);
@@ -1522,7 +1483,7 @@ void tcp_send_probe0(struct sock *sk)
 
        err = tcp_write_wakeup(sk);
 
-       if (tp->packets_out || !tp->send_head) {
+       if (tp->packets_out || !sk->sk_send_head) {
                /* Cancel probe timer, if it is not required. */
                tp->probes_out = 0;
                tp->backoff = 0;