vserver 1.9.3
[linux-2.6.git] / net / ipv4 / tcp_input.c
index 5e7f70f..ece320e 100644 (file)
@@ -555,17 +555,20 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
                tcp_grow_window(sk, tp, skb);
 }
 
-/* Set up a new TCP connection, depending on whether it should be
- * using Vegas or not.
- */    
-void tcp_vegas_init(struct tcp_opt *tp)
+/* When starting a new connection, pin down the current choice of 
+ * congestion algorithm.
+ */
+void tcp_ca_init(struct tcp_opt *tp)
 {
-       if (sysctl_tcp_vegas_cong_avoid) {
-               tp->vegas.do_vegas = 1;
+       if (sysctl_tcp_westwood) 
+               tp->adv_cong = TCP_WESTWOOD;
+       else if (sysctl_tcp_bic)
+               tp->adv_cong = TCP_BIC;
+       else if (sysctl_tcp_vegas_cong_avoid) {
+               tp->adv_cong = TCP_VEGAS;
                tp->vegas.baseRTT = 0x7fffffff;
                tcp_vegas_enable(tp);
-       } else 
-               tcp_vegas_disable(tp);
+       } 
 }
 
 /* Do RTT sampling needed for Vegas.
@@ -799,10 +802,10 @@ __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst)
        __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
        if (!cwnd) {
-               if (tp->mss_cache > 1460)
+               if (tp->mss_cache_std > 1460)
                        cwnd = 2;
                else
-                       cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+                       cwnd = (tp->mss_cache_std > 1095) ? 3 : 4;
        }
        return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
@@ -852,8 +855,10 @@ static void tcp_init_metrics(struct sock *sk)
         * to low value, and then abruptly stops to do it and starts to delay
         * ACKs, wait for troubles.
         */
-       if (dst_metric(dst, RTAX_RTT) > tp->srtt)
+       if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
                tp->srtt = dst_metric(dst, RTAX_RTT);
+               tp->rtt_seq = tp->snd_nxt;
+       }
        if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
                tp->mdev = dst_metric(dst, RTAX_RTTVAR);
                tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
@@ -895,7 +900,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
 #if FASTRETRANS_DEBUG > 1
                printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
                       tp->sack_ok, tp->ca_state,
-                      tp->reordering, tp->fackets_out, tp->sacked_out,
+                      tp->reordering,
+                      tcp_get_pcount(&tp->fackets_out),
+                      tcp_get_pcount(&tp->sacked_out),
                       tp->undo_marker ? tp->undo_retrans : 0);
 #endif
                /* Disable FACK yet. */
@@ -958,7 +965,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
        struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
        int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
-       int reord = tp->packets_out;
+       int reord = tcp_get_pcount(&tp->packets_out);
        int prior_fackets;
        u32 lost_retrans = 0;
        int flag = 0;
@@ -972,9 +979,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                tp->mss_cache = tp->mss_cache_std;
        }
 
-       if (!tp->sacked_out)
-               tp->fackets_out = 0;
-       prior_fackets = tp->fackets_out;
+       if (!tcp_get_pcount(&tp->sacked_out))
+               tcp_set_pcount(&tp->fackets_out, 0);
+       prior_fackets = tcp_get_pcount(&tp->fackets_out);
 
        for (i=0; i<num_sacks; i++, sp++) {
                struct sk_buff *skb;
@@ -1028,7 +1035,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        if(!before(TCP_SKB_CB(skb)->seq, end_seq))
                                break;
 
-                       fack_count++;
+                       fack_count += tcp_skb_pcount(skb);
 
                        in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
                                !before(end_seq, TCP_SKB_CB(skb)->end_seq);
@@ -1072,8 +1079,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                         */
                                        if (sacked & TCPCB_LOST) {
                                                TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
-                                               tp->lost_out--;
-                                               tp->retrans_out--;
+                                               tcp_dec_pcount(&tp->lost_out, skb);
+                                               tcp_dec_pcount(&tp->retrans_out, skb);
                                        }
                                } else {
                                        /* New sack for not retransmitted frame,
@@ -1085,16 +1092,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
                                        if (sacked & TCPCB_LOST) {
                                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-                                               tp->lost_out--;
+                                               tcp_dec_pcount(&tp->lost_out, skb);
                                        }
                                }
 
                                TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
                                flag |= FLAG_DATA_SACKED;
-                               tp->sacked_out++;
+                               tcp_inc_pcount(&tp->sacked_out, skb);
 
-                               if (fack_count > tp->fackets_out)
-                                       tp->fackets_out = fack_count;
+                               if (fack_count > tcp_get_pcount(&tp->fackets_out))
+                                       tcp_set_pcount(&tp->fackets_out, fack_count);
                        } else {
                                if (dup_sack && (sacked&TCPCB_RETRANS))
                                        reord = min(fack_count, reord);
@@ -1108,7 +1115,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        if (dup_sack &&
                            (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-                               tp->retrans_out--;
+                               tcp_dec_pcount(&tp->retrans_out, skb);
                        }
                }
        }
@@ -1132,12 +1139,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                            (IsFack(tp) ||
                             !before(lost_retrans,
                                     TCP_SKB_CB(skb)->ack_seq + tp->reordering *
-                                    tp->mss_cache))) {
+                                    tp->mss_cache_std))) {
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-                               tp->retrans_out--;
+                               tcp_dec_pcount(&tp->retrans_out, skb);
 
                                if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
-                                       tp->lost_out++;
+                                       tcp_inc_pcount(&tp->lost_out, skb);
                                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                                        flag |= FLAG_DATA_SACKED;
                                        NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
@@ -1146,15 +1153,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                }
        }
 
-       tp->left_out = tp->sacked_out + tp->lost_out;
+       tcp_set_pcount(&tp->left_out,
+                      (tcp_get_pcount(&tp->sacked_out) +
+                       tcp_get_pcount(&tp->lost_out)));
 
-       if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
-               tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0);
+       if ((reord < tcp_get_pcount(&tp->fackets_out)) &&
+           tp->ca_state != TCP_CA_Loss)
+               tcp_update_reordering(tp,
+                                     ((tcp_get_pcount(&tp->fackets_out) + 1) -
+                                      reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-       BUG_TRAP((int)tp->sacked_out >= 0);
-       BUG_TRAP((int)tp->lost_out >= 0);
-       BUG_TRAP((int)tp->retrans_out >= 0);
+       BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+       BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+       BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
        BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
 #endif
        return flag;
@@ -1184,7 +1196,7 @@ void tcp_enter_frto(struct sock *sk)
         * If something was really lost, it is eventually caught up
         * in tcp_enter_frto_loss.
         */
-       tp->retrans_out = 0;
+       tcp_set_pcount(&tp->retrans_out, 0);
        tp->undo_marker = tp->snd_una;
        tp->undo_retrans = 0;
 
@@ -1207,26 +1219,26 @@ static void tcp_enter_frto_loss(struct sock *sk)
        struct sk_buff *skb;
        int cnt = 0;
 
-       tp->sacked_out = 0;
-       tp->lost_out = 0;
-       tp->fackets_out = 0;
+       tcp_set_pcount(&tp->sacked_out, 0);
+       tcp_set_pcount(&tp->lost_out, 0);
+       tcp_set_pcount(&tp->fackets_out, 0);
 
        sk_stream_for_retrans_queue(skb, sk) {
-               cnt++;
+               cnt += tcp_skb_pcount(skb);
                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 
                        /* Do not mark those segments lost that were
                         * forward transmitted after RTO
                         */
-                       if(!after(TCP_SKB_CB(skb)->end_seq,
+                       if (!after(TCP_SKB_CB(skb)->end_seq,
                                   tp->frto_highmark)) {
                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                               tp->lost_out++;
+                               tcp_inc_pcount(&tp->lost_out, skb);
                        }
                } else {
-                       tp->sacked_out++;
-                       tp->fackets_out = cnt;
+                       tcp_inc_pcount(&tp->sacked_out, skb);
+                       tcp_set_pcount(&tp->fackets_out, cnt);
                }
        }
        tcp_sync_left_out(tp);
@@ -1248,12 +1260,12 @@ static void tcp_enter_frto_loss(struct sock *sk)
 
 void tcp_clear_retrans(struct tcp_opt *tp)
 {
-       tp->left_out = 0;
-       tp->retrans_out = 0;
+       tcp_set_pcount(&tp->left_out, 0);
+       tcp_set_pcount(&tp->retrans_out, 0);
 
-       tp->fackets_out = 0;
-       tp->sacked_out = 0;
-       tp->lost_out = 0;
+       tcp_set_pcount(&tp->fackets_out, 0);
+       tcp_set_pcount(&tp->sacked_out, 0);
+       tcp_set_pcount(&tp->lost_out, 0);
 
        tp->undo_marker = 0;
        tp->undo_retrans = 0;
@@ -1287,17 +1299,17 @@ void tcp_enter_loss(struct sock *sk, int how)
                tp->undo_marker = tp->snd_una;
 
        sk_stream_for_retrans_queue(skb, sk) {
-               cnt++;
+               cnt += tcp_skb_pcount(skb);
                if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
                        tp->undo_marker = 0;
                TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                       tp->lost_out++;
+                       tcp_inc_pcount(&tp->lost_out, skb);
                } else {
-                       tp->sacked_out++;
-                       tp->fackets_out = cnt;
+                       tcp_inc_pcount(&tp->sacked_out, skb);
+                       tcp_set_pcount(&tp->fackets_out, cnt);
                }
        }
        tcp_sync_left_out(tp);
@@ -1334,7 +1346,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
 
 static inline int tcp_fackets_out(struct tcp_opt *tp)
 {
-       return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+       return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 :
+               tcp_get_pcount(&tp->fackets_out);
 }
 
 static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
@@ -1344,7 +1357,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
 
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
 {
-       return tp->packets_out &&
+       return tcp_get_pcount(&tp->packets_out) &&
               tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
 }
 
@@ -1444,8 +1457,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
 static int
 tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
 {
+       __u32 packets_out;
+
        /* Trick#1: The loss is proven. */
-       if (tp->lost_out)
+       if (tcp_get_pcount(&tp->lost_out))
                return 1;
 
        /* Not-A-Trick#2 : Classic rule... */
@@ -1461,8 +1476,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
        /* Trick#4: It is still not OK... But will it be useful to delay
         * recovery more?
         */
-       if (tp->packets_out <= tp->reordering &&
-           tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
+       packets_out = tcp_get_pcount(&tp->packets_out);
+       if (packets_out <= tp->reordering &&
+           tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
            !tcp_may_send_now(sk, tp)) {
                /* We have nothing to send. This connection is limited
                 * either by receiver window or by application.
@@ -1481,12 +1497,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
 {
        u32 holes;
 
-       holes = max(tp->lost_out, 1U);
-       holes = min(holes, tp->packets_out);
+       holes = max(tcp_get_pcount(&tp->lost_out), 1U);
+       holes = min(holes, tcp_get_pcount(&tp->packets_out));
 
-       if (tp->sacked_out + holes > tp->packets_out) {
-               tp->sacked_out = tp->packets_out - holes;
-               tcp_update_reordering(tp, tp->packets_out+addend, 0);
+       if ((tcp_get_pcount(&tp->sacked_out) + holes) >
+           tcp_get_pcount(&tp->packets_out)) {
+               tcp_set_pcount(&tp->sacked_out,
+                              (tcp_get_pcount(&tp->packets_out) - holes));
+               tcp_update_reordering(tp,
+                                     tcp_get_pcount(&tp->packets_out)+addend,
+                                     0);
        }
 }
 
@@ -1494,7 +1514,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
 
 static void tcp_add_reno_sack(struct tcp_opt *tp)
 {
-       ++tp->sacked_out;
+       tcp_inc_pcount_explicit(&tp->sacked_out, 1);
        tcp_check_reno_reordering(tp, 0);
        tcp_sync_left_out(tp);
 }
@@ -1505,10 +1525,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
 {
        if (acked > 0) {
                /* One ACK acked hole. The rest eat duplicate ACKs. */
-               if (acked-1 >= tp->sacked_out)
-                       tp->sacked_out = 0;
+               if (acked-1 >= tcp_get_pcount(&tp->sacked_out))
+                       tcp_set_pcount(&tp->sacked_out, 0);
                else
-                       tp->sacked_out -= acked-1;
+                       tcp_dec_pcount_explicit(&tp->sacked_out, acked-1);
        }
        tcp_check_reno_reordering(tp, acked);
        tcp_sync_left_out(tp);
@@ -1516,8 +1536,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
 
 static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
 {
-       tp->sacked_out = 0;
-       tp->left_out = tp->lost_out;
+       tcp_set_pcount(&tp->sacked_out, 0);
+       tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out));
 }
 
 /* Mark head of queue up as lost. */
@@ -1527,14 +1547,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
        struct sk_buff *skb;
        int cnt = packets;
 
-       BUG_TRAP(cnt <= tp->packets_out);
+       BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out));
 
        sk_stream_for_retrans_queue(skb, sk) {
-               if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+               cnt -= tcp_skb_pcount(skb);
+               if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
                        break;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                       tp->lost_out++;
+                       tcp_inc_pcount(&tp->lost_out, skb);
                }
        }
        tcp_sync_left_out(tp);
@@ -1545,7 +1566,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
 static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
 {
        if (IsFack(tp)) {
-               int lost = tp->fackets_out - tp->reordering;
+               int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering;
                if (lost <= 0)
                        lost = 1;
                tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
@@ -1565,7 +1586,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
                        if (tcp_skb_timedout(tp, skb) &&
                            !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                               tp->lost_out++;
+                               tcp_inc_pcount(&tp->lost_out, skb);
                        }
                }
                tcp_sync_left_out(tp);
@@ -1630,8 +1651,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
        printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
               msg,
               NIPQUAD(inet->daddr), ntohs(inet->dport),
-              tp->snd_cwnd, tp->left_out,
-              tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
+              tp->snd_cwnd, tcp_get_pcount(&tp->left_out),
+              tp->snd_ssthresh, tp->prior_ssthresh,
+              tcp_get_pcount(&tp->packets_out));
 }
 #else
 #define DBGUNDO(x...) do { } while (0)
@@ -1701,13 +1723,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
 static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
 {
        /* Partial ACK arrived. Force Hoe's retransmit. */
-       int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+       int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering;
 
        if (tcp_may_undo(tp)) {
                /* Plain luck! Hole if filled with delayed
                 * packet, rather than with a retransmit.
                 */
-               if (tp->retrans_out == 0)
+               if (tcp_get_pcount(&tp->retrans_out) == 0)
                        tp->retrans_stamp = 0;
 
                tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
@@ -1734,8 +1756,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                }
                DBGUNDO(sk, tp, "partial loss");
-               tp->lost_out = 0;
-               tp->left_out = tp->sacked_out;
+               tcp_set_pcount(&tp->lost_out, 0);
+               tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
                tcp_undo_cwr(tp, 1);
                NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
                tp->retransmits = 0;
@@ -1758,9 +1780,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
 
 static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
 {
-       tp->left_out = tp->sacked_out;
+       tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
 
-       if (tp->retrans_out == 0)
+       if (tcp_get_pcount(&tp->retrans_out) == 0)
                tp->retrans_stamp = 0;
 
        if (flag&FLAG_ECE)
@@ -1769,8 +1791,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
        if (tp->ca_state != TCP_CA_CWR) {
                int state = TCP_CA_Open;
 
-               if (tp->left_out ||
-                   tp->retrans_out ||
+               if (tcp_get_pcount(&tp->left_out) ||
+                   tcp_get_pcount(&tp->retrans_out) ||
                    tp->undo_marker)
                        state = TCP_CA_Disorder;
 
@@ -1804,11 +1826,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 
        /* Some technical things:
         * 1. Reno does not count dupacks (sacked_out) automatically. */
-       if (!tp->packets_out)
-               tp->sacked_out = 0;
+       if (!tcp_get_pcount(&tp->packets_out))
+               tcp_set_pcount(&tp->sacked_out, 0);
         /* 2. SACK counts snd_fack in packets inaccurately. */
-       if (tp->sacked_out == 0)
-               tp->fackets_out = 0;
+       if (tcp_get_pcount(&tp->sacked_out) == 0)
+               tcp_set_pcount(&tp->fackets_out, 0);
 
         /* Now state machine starts.
         * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -1816,15 +1838,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                tp->prior_ssthresh = 0;
 
        /* B. In all the states check for reneging SACKs. */
-       if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+       if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp))
                return;
 
        /* C. Process data loss notification, provided it is valid. */
        if ((flag&FLAG_DATA_LOST) &&
            before(tp->snd_una, tp->high_seq) &&
            tp->ca_state != TCP_CA_Open &&
-           tp->fackets_out > tp->reordering) {
-               tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+           tcp_get_pcount(&tp->fackets_out) > tp->reordering) {
+               tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq);
                NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
        }
 
@@ -1835,7 +1857,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
         *    when high_seq is ACKed. */
        if (tp->ca_state == TCP_CA_Open) {
                if (!sysctl_tcp_frto)
-                       BUG_TRAP(tp->retrans_out == 0);
+                       BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0);
                tp->retrans_stamp = 0;
        } else if (!before(tp->snd_una, tp->high_seq)) {
                switch (tp->ca_state) {
@@ -1882,7 +1904,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                        if (IsReno(tp) && is_dupack)
                                tcp_add_reno_sack(tp);
                } else {
-                       int acked = prior_packets - tp->packets_out;
+                       int acked = prior_packets -
+                               tcp_get_pcount(&tp->packets_out);
                        if (IsReno(tp))
                                tcp_remove_reno_sacks(sk, tp, acked);
                        is_dupack = tcp_try_undo_partial(sk, tp, acked);
@@ -1925,7 +1948,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                tp->high_seq = tp->snd_nxt;
                tp->prior_ssthresh = 0;
                tp->undo_marker = tp->snd_una;
-               tp->undo_retrans = tp->retrans_out;
+               tp->undo_retrans = tcp_get_pcount(&tp->retrans_out);
 
                if (tp->ca_state < TCP_CA_CWR) {
                        if (!(flag&FLAG_ECE))
@@ -2019,7 +2042,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
 static inline __u32 bictcp_cwnd(struct tcp_opt *tp)
 {
        /* orignal Reno behaviour */
-       if (!sysctl_tcp_bic)
+       if (!tcp_is_bic(tp))
                return tp->snd_cwnd;
 
        if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
@@ -2154,7 +2177,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
                 * is the cwnd during the previous RTT.
                 */
                old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
-                       tp->mss_cache;
+                       tp->mss_cache_std;
                old_snd_cwnd = tp->vegas.beg_snd_cwnd;
 
                /* Save the extent of the current window so we can use this
@@ -2325,13 +2348,89 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
 
 static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
 {
-       if (tp->packets_out==0) {
+       if (!tcp_get_pcount(&tp->packets_out)) {
                tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
        } else {
                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
        }
 }
 
+/* There is one downside to this scheme.  Although we keep the
+ * ACK clock ticking, adjusting packet counters and advancing
+ * congestion window, we do not liberate socket send buffer
+ * space.
+ *
+ * Mucking with skb->truesize and sk->sk_wmem_alloc et al.
+ * then making a write space wakeup callback is a possible
+ * future enhancement.  WARNING: it is not trivial to make.
+ */
+static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
+                        __u32 now, __s32 *seq_rtt)
+{
+       struct tcp_opt *tp = tcp_sk(sk);
+       struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+       __u32 mss = tcp_skb_mss(skb);
+       __u32 snd_una = tp->snd_una;
+       __u32 orig_seq, seq;
+       __u32 packets_acked = 0;
+       int acked = 0;
+
+       /* If we get here, the whole TSO packet has not been
+        * acked.
+        */
+       BUG_ON(!after(scb->end_seq, snd_una));
+
+       seq = orig_seq = scb->seq;
+       while (!after(seq + mss, snd_una)) {
+               packets_acked++;
+               seq += mss;
+       }
+
+       if (tcp_trim_head(sk, skb, (seq - orig_seq)))
+               return 0;
+
+       if (packets_acked) {
+               __u8 sacked = scb->sacked;
+
+               acked |= FLAG_DATA_ACKED;
+               if (sacked) {
+                       if (sacked & TCPCB_RETRANS) {
+                               if (sacked & TCPCB_SACKED_RETRANS)
+                                       tcp_dec_pcount_explicit(&tp->retrans_out,
+                                                               packets_acked);
+                               acked |= FLAG_RETRANS_DATA_ACKED;
+                               *seq_rtt = -1;
+                       } else if (*seq_rtt < 0)
+                               *seq_rtt = now - scb->when;
+                       if (sacked & TCPCB_SACKED_ACKED)
+                               tcp_dec_pcount_explicit(&tp->sacked_out,
+                                                       packets_acked);
+                       if (sacked & TCPCB_LOST)
+                               tcp_dec_pcount_explicit(&tp->lost_out,
+                                                       packets_acked);
+                       if (sacked & TCPCB_URG) {
+                               if (tp->urg_mode &&
+                                   !before(seq, tp->snd_up))
+                                       tp->urg_mode = 0;
+                       }
+               } else if (*seq_rtt < 0)
+                       *seq_rtt = now - scb->when;
+
+               if (tcp_get_pcount(&tp->fackets_out)) {
+                       __u32 dval = min(tcp_get_pcount(&tp->fackets_out),
+                                        packets_acked);
+                       tcp_dec_pcount_explicit(&tp->fackets_out, dval);
+               }
+               tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
+
+               BUG_ON(tcp_skb_pcount(skb) == 0);
+               BUG_ON(!before(scb->seq, scb->end_seq));
+       }
+
+       return acked;
+}
+
+
 /* Remove acknowledged frames from the retransmission queue. */
 static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
@@ -2341,7 +2440,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
        int acked = 0;
        __s32 seq_rtt = -1;
 
-       while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) {
+       while ((skb = skb_peek(&sk->sk_write_queue)) &&
+              skb != sk->sk_send_head) {
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
                __u8 sacked = scb->sacked;
 
@@ -2349,8 +2449,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
                 * discard it as it's confirmed to have arrived at
                 * the other end.
                 */
-               if (after(scb->end_seq, tp->snd_una))
+               if (after(scb->end_seq, tp->snd_una)) {
+                       if (tcp_skb_pcount(skb) > 1)
+                               acked |= tcp_tso_acked(sk, skb,
+                                                      now, &seq_rtt);
                        break;
+               }
 
                /* Initial outgoing SYN's get put onto the write_queue
                 * just like anything else we transmit.  It is not
@@ -2359,7 +2463,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
                 * connection startup slow start one packet too
                 * quickly.  This is severely frowned upon behavior.
                 */
-               if(!(scb->flags & TCPCB_FLAG_SYN)) {
+               if (!(scb->flags & TCPCB_FLAG_SYN)) {
                        acked |= FLAG_DATA_ACKED;
                } else {
                        acked |= FLAG_SYN_ACKED;
@@ -2367,27 +2471,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
                }
 
                if (sacked) {
-                       if(sacked & TCPCB_RETRANS) {
+                       if (sacked & TCPCB_RETRANS) {
                                if(sacked & TCPCB_SACKED_RETRANS)
-                                       tp->retrans_out--;
+                                       tcp_dec_pcount(&tp->retrans_out, skb);
                                acked |= FLAG_RETRANS_DATA_ACKED;
                                seq_rtt = -1;
                        } else if (seq_rtt < 0)
                                seq_rtt = now - scb->when;
-                       if(sacked & TCPCB_SACKED_ACKED)
-                               tp->sacked_out--;
-                       if(sacked & TCPCB_LOST)
-                               tp->lost_out--;
-                       if(sacked & TCPCB_URG) {
+                       if (sacked & TCPCB_SACKED_ACKED)
+                               tcp_dec_pcount(&tp->sacked_out, skb);
+                       if (sacked & TCPCB_LOST)
+                               tcp_dec_pcount(&tp->lost_out, skb);
+                       if (sacked & TCPCB_URG) {
                                if (tp->urg_mode &&
                                    !before(scb->end_seq, tp->snd_up))
                                        tp->urg_mode = 0;
                        }
                } else if (seq_rtt < 0)
                        seq_rtt = now - scb->when;
-               if (tp->fackets_out)
-                       tp->fackets_out--;
-               tp->packets_out--;
+               tcp_dec_pcount_approx(&tp->fackets_out, skb);
+               tcp_packets_out_dec(tp, skb);
                __skb_unlink(skb, skb->list);
                sk_stream_free_skb(sk, skb);
        }
@@ -2398,24 +2501,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
        }
 
 #if FASTRETRANS_DEBUG > 0
-       BUG_TRAP((int)tp->sacked_out >= 0);
-       BUG_TRAP((int)tp->lost_out >= 0);
-       BUG_TRAP((int)tp->retrans_out >= 0);
-       if (!tp->packets_out && tp->sack_ok) {
-               if (tp->lost_out) {
-                       printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
-                                                           tp->ca_state);
-                       tp->lost_out = 0;
+       BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+       BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+       BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
+       if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
+               if (tcp_get_pcount(&tp->lost_out)) {
+                       printk(KERN_DEBUG "Leak l=%u %d\n",
+                              tcp_get_pcount(&tp->lost_out),
+                              tp->ca_state);
+                       tcp_set_pcount(&tp->lost_out, 0);
                }
-               if (tp->sacked_out) {
-                       printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out,
-                                                           tp->ca_state);
-                       tp->sacked_out = 0;
+               if (tcp_get_pcount(&tp->sacked_out)) {
+                       printk(KERN_DEBUG "Leak s=%u %d\n",
+                              tcp_get_pcount(&tp->sacked_out),
+                              tp->ca_state);
+                       tcp_set_pcount(&tp->sacked_out, 0);
                }
-               if (tp->retrans_out) {
-                       printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out,
-                                                           tp->ca_state);
-                       tp->retrans_out = 0;
+               if (tcp_get_pcount(&tp->retrans_out)) {
+                       printk(KERN_DEBUG "Leak r=%u %d\n",
+                              tcp_get_pcount(&tp->retrans_out),
+                              tp->ca_state);
+                       tcp_set_pcount(&tp->retrans_out, 0);
                }
        }
 #endif
@@ -2594,18 +2700,16 @@ static void westwood_filter(struct sock *sk, __u32 delta)
  * WESTWOOD_RTT_MIN minimum bound since we could be on a LAN!
  */
 
-static inline __u32 westwood_update_rttmin(struct sock *sk)
+static inline __u32 westwood_update_rttmin(const struct sock *sk)
 {
-       struct tcp_opt *tp = tcp_sk(sk);
+       const struct tcp_opt *tp = tcp_sk(sk);
        __u32 rttmin = tp->westwood.rtt_min;
 
-       if (tp->westwood.rtt == 0)
-               return(rttmin);
-
-       if (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin)
+       if (tp->westwood.rtt != 0 &&
+           (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin))
                rttmin = tp->westwood.rtt;
 
-       return(rttmin);
+       return rttmin;
 }
 
 /*
@@ -2613,11 +2717,11 @@ static inline __u32 westwood_update_rttmin(struct sock *sk)
  * Evaluate increases for dk. 
  */
 
-static inline __u32 westwood_acked(struct sock *sk)
+static inline __u32 westwood_acked(const struct sock *sk)
 {
-       struct tcp_opt *tp = tcp_sk(sk);
+       const struct tcp_opt *tp = tcp_sk(sk);
 
-       return ((tp->snd_una) - (tp->westwood.snd_una));
+       return tp->snd_una - tp->westwood.snd_una;
 }
 
 /*
@@ -2629,9 +2733,9 @@ static inline __u32 westwood_acked(struct sock *sk)
  * window, 1 if the sample has to be considered in the next window.
  */
 
-static int westwood_new_window(struct sock *sk)
+static int westwood_new_window(const struct sock *sk)
 {
-       struct tcp_opt *tp = tcp_sk(sk);
+       const struct tcp_opt *tp = tcp_sk(sk);
        __u32 left_bound;
        __u32 rtt;
        int ret = 0;
@@ -2665,14 +2769,13 @@ static void __westwood_update_window(struct sock *sk, __u32 now)
        struct tcp_opt *tp = tcp_sk(sk);
        __u32 delta = now - tp->westwood.rtt_win_sx;
 
-        if (!delta)
-                return;
-
-       if (tp->westwood.rtt)
-                westwood_filter(sk, delta);
+        if (delta) {
+               if (tp->westwood.rtt)
+                       westwood_filter(sk, delta);
 
-        tp->westwood.bk = 0;
-        tp->westwood.rtt_win_sx = tcp_time_stamp;
+               tp->westwood.bk = 0;
+               tp->westwood.rtt_win_sx = tcp_time_stamp;
+       }
 }
 
 
@@ -2710,19 +2813,19 @@ static void westwood_dupack_update(struct sock *sk)
 {
        struct tcp_opt *tp = tcp_sk(sk);
 
-       tp->westwood.accounted += tp->mss_cache;
-       tp->westwood.cumul_ack = tp->mss_cache;
+       tp->westwood.accounted += tp->mss_cache_std;
+       tp->westwood.cumul_ack = tp->mss_cache_std;
 }
 
 static inline int westwood_may_change_cumul(struct tcp_opt *tp)
 {
-       return ((tp->westwood.cumul_ack) > tp->mss_cache);
+       return (tp->westwood.cumul_ack > tp->mss_cache_std);
 }
 
 static inline void westwood_partial_update(struct tcp_opt *tp)
 {
        tp->westwood.accounted -= tp->westwood.cumul_ack;
-       tp->westwood.cumul_ack = tp->mss_cache;
+       tp->westwood.cumul_ack = tp->mss_cache_std;
 }
 
 static inline void westwood_complete_update(struct tcp_opt *tp)
@@ -2737,7 +2840,7 @@ static inline void westwood_complete_update(struct tcp_opt *tp)
  * delayed or partial acks.
  */
 
-static __u32 westwood_acked_count(struct sock *sk)
+static inline __u32 westwood_acked_count(struct sock *sk)
 {
        struct tcp_opt *tp = tcp_sk(sk);
 
@@ -2751,7 +2854,7 @@ static __u32 westwood_acked_count(struct sock *sk)
 
         if (westwood_may_change_cumul(tp)) {
                /* Partial or delayed ack */
-               if ((tp->westwood.accounted) >= (tp->westwood.cumul_ack))
+               if (tp->westwood.accounted >= tp->westwood.cumul_ack)
                        westwood_partial_update(tp);
                else
                        westwood_complete_update(tp);
@@ -2833,7 +2936,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
         */
        sk->sk_err_soft = 0;
        tp->rcv_tstamp = tcp_time_stamp;
-       prior_packets = tp->packets_out;
+       prior_packets = tcp_get_pcount(&tp->packets_out);
        if (!prior_packets)
                goto no_queue;
 
@@ -3855,11 +3958,11 @@ static void tcp_new_space(struct sock *sk)
 {
        struct tcp_opt *tp = tcp_sk(sk);
 
-       if (tp->packets_out < tp->snd_cwnd &&
+       if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd &&
            !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
            !tcp_memory_pressure &&
            atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
-               int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
+               int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) +
                        MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
                    demanded = max_t(unsigned int, tp->snd_cwnd,
                                                   tp->reordering + 1);