X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_input.c;h=ece320e60f710cce99173133d7e4a4a39dbff113;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=5e7f70f1c9403f13980b9fd58dac355c51882196;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5e7f70f1c..ece320e60 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -555,17 +555,20 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
 		tcp_grow_window(sk, tp, skb);
 }
 
-/* Set up a new TCP connection, depending on whether it should be
- * using Vegas or not.
- */    
-void tcp_vegas_init(struct tcp_opt *tp)
+/* When starting a new connection, pin down the current choice of 
+ * congestion algorithm.
+ */
+void tcp_ca_init(struct tcp_opt *tp)
 {
-	if (sysctl_tcp_vegas_cong_avoid) {
-		tp->vegas.do_vegas = 1;
+	if (sysctl_tcp_westwood) 
+		tp->adv_cong = TCP_WESTWOOD;
+	else if (sysctl_tcp_bic)
+		tp->adv_cong = TCP_BIC;
+	else if (sysctl_tcp_vegas_cong_avoid) {
+		tp->adv_cong = TCP_VEGAS;
 		tp->vegas.baseRTT = 0x7fffffff;
 		tcp_vegas_enable(tp);
-	} else 
-		tcp_vegas_disable(tp);
+	} 
 }
 
 /* Do RTT sampling needed for Vegas.
@@ -799,10 +802,10 @@ __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst)
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
 	if (!cwnd) {
-		if (tp->mss_cache > 1460)
+		if (tp->mss_cache_std > 1460)
 			cwnd = 2;
 		else
-			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+			cwnd = (tp->mss_cache_std > 1095) ? 3 : 4;
 	}
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
@@ -852,8 +855,10 @@ static void tcp_init_metrics(struct sock *sk)
 	 * to low value, and then abruptly stops to do it and starts to delay
 	 * ACKs, wait for troubles.
 	 */
-	if (dst_metric(dst, RTAX_RTT) > tp->srtt)
+	if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
 		tp->srtt = dst_metric(dst, RTAX_RTT);
+		tp->rtt_seq = tp->snd_nxt;
+	}
 	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
 		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
@@ -895,7 +900,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
 #if FASTRETRANS_DEBUG > 1
 		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
 		       tp->sack_ok, tp->ca_state,
-		       tp->reordering, tp->fackets_out, tp->sacked_out,
+		       tp->reordering,
+		       tcp_get_pcount(&tp->fackets_out),
+		       tcp_get_pcount(&tp->sacked_out),
 		       tp->undo_marker ? tp->undo_retrans : 0);
 #endif
 		/* Disable FACK yet. */
@@ -958,7 +965,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
 	struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
-	int reord = tp->packets_out;
+	int reord = tcp_get_pcount(&tp->packets_out);
 	int prior_fackets;
 	u32 lost_retrans = 0;
 	int flag = 0;
@@ -972,9 +979,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		tp->mss_cache = tp->mss_cache_std;
 	}
 
-	if (!tp->sacked_out)
-		tp->fackets_out = 0;
-	prior_fackets = tp->fackets_out;
+	if (!tcp_get_pcount(&tp->sacked_out))
+		tcp_set_pcount(&tp->fackets_out, 0);
+	prior_fackets = tcp_get_pcount(&tp->fackets_out);
 
 	for (i=0; i<num_sacks; i++, sp++) {
 		struct sk_buff *skb;
@@ -1028,7 +1035,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if(!before(TCP_SKB_CB(skb)->seq, end_seq))
 				break;
 
-			fack_count++;
+			fack_count += tcp_skb_pcount(skb);
 
 			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
 				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
@@ -1072,8 +1079,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 					 */
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
-						tp->lost_out--;
-						tp->retrans_out--;
+						tcp_dec_pcount(&tp->lost_out, skb);
+						tcp_dec_pcount(&tp->retrans_out, skb);
 					}
 				} else {
 					/* New sack for not retransmitted frame,
@@ -1085,16 +1092,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-						tp->lost_out--;
+						tcp_dec_pcount(&tp->lost_out, skb);
 					}
 				}
 
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
 				flag |= FLAG_DATA_SACKED;
-				tp->sacked_out++;
+				tcp_inc_pcount(&tp->sacked_out, skb);
 
-				if (fack_count > tp->fackets_out)
-					tp->fackets_out = fack_count;
+				if (fack_count > tcp_get_pcount(&tp->fackets_out))
+					tcp_set_pcount(&tp->fackets_out, fack_count);
 			} else {
 				if (dup_sack && (sacked&TCPCB_RETRANS))
 					reord = min(fack_count, reord);
@@ -1108,7 +1115,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (dup_sack &&
 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-				tp->retrans_out--;
+				tcp_dec_pcount(&tp->retrans_out, skb);
 			}
 		}
 	}
@@ -1132,12 +1139,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			    (IsFack(tp) ||
 			     !before(lost_retrans,
 				     TCP_SKB_CB(skb)->ack_seq + tp->reordering *
-				     tp->mss_cache))) {
+				     tp->mss_cache_std))) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-				tp->retrans_out--;
+				tcp_dec_pcount(&tp->retrans_out, skb);
 
 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
-					tp->lost_out++;
+					tcp_inc_pcount(&tp->lost_out, skb);
 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 					flag |= FLAG_DATA_SACKED;
 					NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
@@ -1146,15 +1153,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		}
 	}
 
-	tp->left_out = tp->sacked_out + tp->lost_out;
+	tcp_set_pcount(&tp->left_out,
+		       (tcp_get_pcount(&tp->sacked_out) +
+			tcp_get_pcount(&tp->lost_out)));
 
-	if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
-		tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0);
+	if ((reord < tcp_get_pcount(&tp->fackets_out)) &&
+	    tp->ca_state != TCP_CA_Loss)
+		tcp_update_reordering(tp,
+				      ((tcp_get_pcount(&tp->fackets_out) + 1) -
+				       reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-	BUG_TRAP((int)tp->sacked_out >= 0);
-	BUG_TRAP((int)tp->lost_out >= 0);
-	BUG_TRAP((int)tp->retrans_out >= 0);
+	BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+	BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+	BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
 	BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
 #endif
 	return flag;
@@ -1184,7 +1196,7 @@ void tcp_enter_frto(struct sock *sk)
 	 * If something was really lost, it is eventually caught up
 	 * in tcp_enter_frto_loss.
 	 */
-	tp->retrans_out = 0;
+	tcp_set_pcount(&tp->retrans_out, 0);
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
@@ -1207,26 +1219,26 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	struct sk_buff *skb;
 	int cnt = 0;
 
-	tp->sacked_out = 0;
-	tp->lost_out = 0;
-	tp->fackets_out = 0;
+	tcp_set_pcount(&tp->sacked_out, 0);
+	tcp_set_pcount(&tp->lost_out, 0);
+	tcp_set_pcount(&tp->fackets_out, 0);
 
 	sk_stream_for_retrans_queue(skb, sk) {
-		cnt++;
+		cnt += tcp_skb_pcount(skb);
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 
 			/* Do not mark those segments lost that were
 			 * forward transmitted after RTO
 			 */
-			if(!after(TCP_SKB_CB(skb)->end_seq,
+			if (!after(TCP_SKB_CB(skb)->end_seq,
 				   tp->frto_highmark)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				tp->lost_out++;
+				tcp_inc_pcount(&tp->lost_out, skb);
 			}
 		} else {
-			tp->sacked_out++;
-			tp->fackets_out = cnt;
+			tcp_inc_pcount(&tp->sacked_out, skb);
+			tcp_set_pcount(&tp->fackets_out, cnt);
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1248,12 +1260,12 @@ static void tcp_enter_frto_loss(struct sock *sk)
 
 void tcp_clear_retrans(struct tcp_opt *tp)
 {
-	tp->left_out = 0;
-	tp->retrans_out = 0;
+	tcp_set_pcount(&tp->left_out, 0);
+	tcp_set_pcount(&tp->retrans_out, 0);
 
-	tp->fackets_out = 0;
-	tp->sacked_out = 0;
-	tp->lost_out = 0;
+	tcp_set_pcount(&tp->fackets_out, 0);
+	tcp_set_pcount(&tp->sacked_out, 0);
+	tcp_set_pcount(&tp->lost_out, 0);
 
 	tp->undo_marker = 0;
 	tp->undo_retrans = 0;
@@ -1287,17 +1299,17 @@ void tcp_enter_loss(struct sock *sk, int how)
 		tp->undo_marker = tp->snd_una;
 
 	sk_stream_for_retrans_queue(skb, sk) {
-		cnt++;
+		cnt += tcp_skb_pcount(skb);
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			tp->undo_marker = 0;
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			tp->lost_out++;
+			tcp_inc_pcount(&tp->lost_out, skb);
 		} else {
-			tp->sacked_out++;
-			tp->fackets_out = cnt;
+			tcp_inc_pcount(&tp->sacked_out, skb);
+			tcp_set_pcount(&tp->fackets_out, cnt);
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1334,7 +1346,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
 
 static inline int tcp_fackets_out(struct tcp_opt *tp)
 {
-	return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+	return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 :
+		tcp_get_pcount(&tp->fackets_out);
 }
 
 static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
@@ -1344,7 +1357,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
 
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
 {
-	return tp->packets_out &&
+	return tcp_get_pcount(&tp->packets_out) &&
 	       tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
 }
 
@@ -1444,8 +1457,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
 static int
 tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
 {
+	__u32 packets_out;
+
 	/* Trick#1: The loss is proven. */
-	if (tp->lost_out)
+	if (tcp_get_pcount(&tp->lost_out))
 		return 1;
 
 	/* Not-A-Trick#2 : Classic rule... */
@@ -1461,8 +1476,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
 	/* Trick#4: It is still not OK... But will it be useful to delay
 	 * recovery more?
 	 */
-	if (tp->packets_out <= tp->reordering &&
-	    tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
+	packets_out = tcp_get_pcount(&tp->packets_out);
+	if (packets_out <= tp->reordering &&
+	    tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
 	    !tcp_may_send_now(sk, tp)) {
 		/* We have nothing to send. This connection is limited
 		 * either by receiver window or by application.
@@ -1481,12 +1497,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
 {
 	u32 holes;
 
-	holes = max(tp->lost_out, 1U);
-	holes = min(holes, tp->packets_out);
+	holes = max(tcp_get_pcount(&tp->lost_out), 1U);
+	holes = min(holes, tcp_get_pcount(&tp->packets_out));
 
-	if (tp->sacked_out + holes > tp->packets_out) {
-		tp->sacked_out = tp->packets_out - holes;
-		tcp_update_reordering(tp, tp->packets_out+addend, 0);
+	if ((tcp_get_pcount(&tp->sacked_out) + holes) >
+	    tcp_get_pcount(&tp->packets_out)) {
+		tcp_set_pcount(&tp->sacked_out,
+			       (tcp_get_pcount(&tp->packets_out) - holes));
+		tcp_update_reordering(tp,
+				      tcp_get_pcount(&tp->packets_out)+addend,
+				      0);
 	}
 }
 
@@ -1494,7 +1514,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
 
 static void tcp_add_reno_sack(struct tcp_opt *tp)
 {
-	++tp->sacked_out;
+	tcp_inc_pcount_explicit(&tp->sacked_out, 1);
 	tcp_check_reno_reordering(tp, 0);
 	tcp_sync_left_out(tp);
 }
@@ -1505,10 +1525,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
 {
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
-		if (acked-1 >= tp->sacked_out)
-			tp->sacked_out = 0;
+		if (acked-1 >= tcp_get_pcount(&tp->sacked_out))
+			tcp_set_pcount(&tp->sacked_out, 0);
 		else
-			tp->sacked_out -= acked-1;
+			tcp_dec_pcount_explicit(&tp->sacked_out, acked-1);
 	}
 	tcp_check_reno_reordering(tp, acked);
 	tcp_sync_left_out(tp);
@@ -1516,8 +1536,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
 
 static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
 {
-	tp->sacked_out = 0;
-	tp->left_out = tp->lost_out;
+	tcp_set_pcount(&tp->sacked_out, 0);
+	tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out));
 }
 
 /* Mark head of queue up as lost. */
@@ -1527,14 +1547,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
 	struct sk_buff *skb;
 	int cnt = packets;
 
-	BUG_TRAP(cnt <= tp->packets_out);
+	BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out));
 
 	sk_stream_for_retrans_queue(skb, sk) {
-		if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+		cnt -= tcp_skb_pcount(skb);
+		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-			tp->lost_out++;
+			tcp_inc_pcount(&tp->lost_out, skb);
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1545,7 +1566,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
 static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
 {
 	if (IsFack(tp)) {
-		int lost = tp->fackets_out - tp->reordering;
+		int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
 		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
@@ -1565,7 +1586,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
 			if (tcp_skb_timedout(tp, skb) &&
 			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-				tp->lost_out++;
+				tcp_inc_pcount(&tp->lost_out, skb);
 			}
 		}
 		tcp_sync_left_out(tp);
@@ -1630,8 +1651,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
 	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
 	       msg,
 	       NIPQUAD(inet->daddr), ntohs(inet->dport),
-	       tp->snd_cwnd, tp->left_out,
-	       tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
+	       tp->snd_cwnd, tcp_get_pcount(&tp->left_out),
+	       tp->snd_ssthresh, tp->prior_ssthresh,
+	       tcp_get_pcount(&tp->packets_out));
 }
 #else
 #define DBGUNDO(x...) do { } while (0)
@@ -1701,13 +1723,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
 static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
 {
 	/* Partial ACK arrived. Force Hoe's retransmit. */
-	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+	int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering;
 
 	if (tcp_may_undo(tp)) {
 		/* Plain luck! Hole if filled with delayed
 		 * packet, rather than with a retransmit.
 		 */
-		if (tp->retrans_out == 0)
+		if (tcp_get_pcount(&tp->retrans_out) == 0)
 			tp->retrans_stamp = 0;
 
 		tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
@@ -1734,8 +1756,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
 		DBGUNDO(sk, tp, "partial loss");
-		tp->lost_out = 0;
-		tp->left_out = tp->sacked_out;
+		tcp_set_pcount(&tp->lost_out, 0);
+		tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
 		tcp_undo_cwr(tp, 1);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
 		tp->retransmits = 0;
@@ -1758,9 +1780,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
 
 static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
 {
-	tp->left_out = tp->sacked_out;
+	tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
 
-	if (tp->retrans_out == 0)
+	if (tcp_get_pcount(&tp->retrans_out) == 0)
 		tp->retrans_stamp = 0;
 
 	if (flag&FLAG_ECE)
@@ -1769,8 +1791,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
 	if (tp->ca_state != TCP_CA_CWR) {
 		int state = TCP_CA_Open;
 
-		if (tp->left_out ||
-		    tp->retrans_out ||
+		if (tcp_get_pcount(&tp->left_out) ||
+		    tcp_get_pcount(&tp->retrans_out) ||
 		    tp->undo_marker)
 			state = TCP_CA_Disorder;
 
@@ -1804,11 +1826,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 
 	/* Some technical things:
 	 * 1. Reno does not count dupacks (sacked_out) automatically. */
-	if (!tp->packets_out)
-		tp->sacked_out = 0;
+	if (!tcp_get_pcount(&tp->packets_out))
+		tcp_set_pcount(&tp->sacked_out, 0);
         /* 2. SACK counts snd_fack in packets inaccurately. */
-	if (tp->sacked_out == 0)
-		tp->fackets_out = 0;
+	if (tcp_get_pcount(&tp->sacked_out) == 0)
+		tcp_set_pcount(&tp->fackets_out, 0);
 
         /* Now state machine starts.
 	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -1816,15 +1838,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		tp->prior_ssthresh = 0;
 
 	/* B. In all the states check for reneging SACKs. */
-	if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+	if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp))
 		return;
 
 	/* C. Process data loss notification, provided it is valid. */
 	if ((flag&FLAG_DATA_LOST) &&
 	    before(tp->snd_una, tp->high_seq) &&
 	    tp->ca_state != TCP_CA_Open &&
-	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+	    tcp_get_pcount(&tp->fackets_out) > tp->reordering) {
+		tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -1835,7 +1857,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	 *    when high_seq is ACKed. */
 	if (tp->ca_state == TCP_CA_Open) {
 		if (!sysctl_tcp_frto)
-			BUG_TRAP(tp->retrans_out == 0);
+			BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (tp->ca_state) {
@@ -1882,7 +1904,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			if (IsReno(tp) && is_dupack)
 				tcp_add_reno_sack(tp);
 		} else {
-			int acked = prior_packets - tp->packets_out;
+			int acked = prior_packets -
+				tcp_get_pcount(&tp->packets_out);
 			if (IsReno(tp))
 				tcp_remove_reno_sacks(sk, tp, acked);
 			is_dupack = tcp_try_undo_partial(sk, tp, acked);
@@ -1925,7 +1948,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		tp->high_seq = tp->snd_nxt;
 		tp->prior_ssthresh = 0;
 		tp->undo_marker = tp->snd_una;
-		tp->undo_retrans = tp->retrans_out;
+		tp->undo_retrans = tcp_get_pcount(&tp->retrans_out);
 
 		if (tp->ca_state < TCP_CA_CWR) {
 			if (!(flag&FLAG_ECE))
@@ -2019,7 +2042,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
 static inline __u32 bictcp_cwnd(struct tcp_opt *tp)
 {
 	/* orignal Reno behaviour */
-	if (!sysctl_tcp_bic)
+	if (!tcp_is_bic(tp))
 		return tp->snd_cwnd;
 
 	if (tp->bictcp.last_cwnd == tp->snd_cwnd &&
@@ -2154,7 +2177,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
 		 * is the cwnd during the previous RTT.
 		 */
 		old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
-			tp->mss_cache;
+			tp->mss_cache_std;
 		old_snd_cwnd = tp->vegas.beg_snd_cwnd;
 
 		/* Save the extent of the current window so we can use this
@@ -2325,13 +2348,89 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
 
 static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
 {
-	if (tp->packets_out==0) {
+	if (!tcp_get_pcount(&tp->packets_out)) {
 		tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
 	} else {
 		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
 	}
 }
 
+/* There is one downside to this scheme.  Although we keep the
+ * ACK clock ticking, adjusting packet counters and advancing
+ * congestion window, we do not liberate socket send buffer
+ * space.
+ *
+ * Mucking with skb->truesize and sk->sk_wmem_alloc et al.
+ * then making a write space wakeup callback is a possible
+ * future enhancement.  WARNING: it is not trivial to make.
+ */
+static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
+			 __u32 now, __s32 *seq_rtt)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
+	__u32 mss = tcp_skb_mss(skb);
+	__u32 snd_una = tp->snd_una;
+	__u32 orig_seq, seq;
+	__u32 packets_acked = 0;
+	int acked = 0;
+
+	/* If we get here, the whole TSO packet has not been
+	 * acked.
+	 */
+	BUG_ON(!after(scb->end_seq, snd_una));
+
+	seq = orig_seq = scb->seq;
+	while (!after(seq + mss, snd_una)) {
+		packets_acked++;
+		seq += mss;
+	}
+
+	if (tcp_trim_head(sk, skb, (seq - orig_seq)))
+		return 0;
+
+	if (packets_acked) {
+		__u8 sacked = scb->sacked;
+
+		acked |= FLAG_DATA_ACKED;
+		if (sacked) {
+			if (sacked & TCPCB_RETRANS) {
+				if (sacked & TCPCB_SACKED_RETRANS)
+					tcp_dec_pcount_explicit(&tp->retrans_out,
+								packets_acked);
+				acked |= FLAG_RETRANS_DATA_ACKED;
+				*seq_rtt = -1;
+			} else if (*seq_rtt < 0)
+				*seq_rtt = now - scb->when;
+			if (sacked & TCPCB_SACKED_ACKED)
+				tcp_dec_pcount_explicit(&tp->sacked_out,
+							packets_acked);
+			if (sacked & TCPCB_LOST)
+				tcp_dec_pcount_explicit(&tp->lost_out,
+							packets_acked);
+			if (sacked & TCPCB_URG) {
+				if (tp->urg_mode &&
+				    !before(seq, tp->snd_up))
+					tp->urg_mode = 0;
+			}
+		} else if (*seq_rtt < 0)
+			*seq_rtt = now - scb->when;
+
+		if (tcp_get_pcount(&tp->fackets_out)) {
+			__u32 dval = min(tcp_get_pcount(&tp->fackets_out),
+					 packets_acked);
+			tcp_dec_pcount_explicit(&tp->fackets_out, dval);
+		}
+		tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
+
+		BUG_ON(tcp_skb_pcount(skb) == 0);
+		BUG_ON(!before(scb->seq, scb->end_seq));
+	}
+
+	return acked;
+}
+
+
 /* Remove acknowledged frames from the retransmission queue. */
 static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
@@ -2341,7 +2440,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	int acked = 0;
 	__s32 seq_rtt = -1;
 
-	while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) {
+	while ((skb = skb_peek(&sk->sk_write_queue)) &&
+	       skb != sk->sk_send_head) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
 		__u8 sacked = scb->sacked;
 
@@ -2349,8 +2449,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		 * discard it as it's confirmed to have arrived at
 		 * the other end.
 		 */
-		if (after(scb->end_seq, tp->snd_una))
+		if (after(scb->end_seq, tp->snd_una)) {
+			if (tcp_skb_pcount(skb) > 1)
+				acked |= tcp_tso_acked(sk, skb,
+						       now, &seq_rtt);
 			break;
+		}
 
 		/* Initial outgoing SYN's get put onto the write_queue
 		 * just like anything else we transmit.  It is not
@@ -2359,7 +2463,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if(!(scb->flags & TCPCB_FLAG_SYN)) {
+		if (!(scb->flags & TCPCB_FLAG_SYN)) {
 			acked |= FLAG_DATA_ACKED;
 		} else {
 			acked |= FLAG_SYN_ACKED;
@@ -2367,27 +2471,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		}
 
 		if (sacked) {
-			if(sacked & TCPCB_RETRANS) {
+			if (sacked & TCPCB_RETRANS) {
 				if(sacked & TCPCB_SACKED_RETRANS)
-					tp->retrans_out--;
+					tcp_dec_pcount(&tp->retrans_out, skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
 			} else if (seq_rtt < 0)
 				seq_rtt = now - scb->when;
-			if(sacked & TCPCB_SACKED_ACKED)
-				tp->sacked_out--;
-			if(sacked & TCPCB_LOST)
-				tp->lost_out--;
-			if(sacked & TCPCB_URG) {
+			if (sacked & TCPCB_SACKED_ACKED)
+				tcp_dec_pcount(&tp->sacked_out, skb);
+			if (sacked & TCPCB_LOST)
+				tcp_dec_pcount(&tp->lost_out, skb);
+			if (sacked & TCPCB_URG) {
 				if (tp->urg_mode &&
 				    !before(scb->end_seq, tp->snd_up))
 					tp->urg_mode = 0;
 			}
 		} else if (seq_rtt < 0)
 			seq_rtt = now - scb->when;
-		if (tp->fackets_out)
-			tp->fackets_out--;
-		tp->packets_out--;
+		tcp_dec_pcount_approx(&tp->fackets_out, skb);
+		tcp_packets_out_dec(tp, skb);
 		__skb_unlink(skb, skb->list);
 		sk_stream_free_skb(sk, skb);
 	}
@@ -2398,24 +2501,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	}
 
 #if FASTRETRANS_DEBUG > 0
-	BUG_TRAP((int)tp->sacked_out >= 0);
-	BUG_TRAP((int)tp->lost_out >= 0);
-	BUG_TRAP((int)tp->retrans_out >= 0);
-	if (!tp->packets_out && tp->sack_ok) {
-		if (tp->lost_out) {
-			printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
-							    tp->ca_state);
-			tp->lost_out = 0;
+	BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+	BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+	BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
+	if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
+		if (tcp_get_pcount(&tp->lost_out)) {
+			printk(KERN_DEBUG "Leak l=%u %d\n",
+			       tcp_get_pcount(&tp->lost_out),
+			       tp->ca_state);
+			tcp_set_pcount(&tp->lost_out, 0);
 		}
-		if (tp->sacked_out) {
-			printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out,
-							    tp->ca_state);
-			tp->sacked_out = 0;
+		if (tcp_get_pcount(&tp->sacked_out)) {
+			printk(KERN_DEBUG "Leak s=%u %d\n",
+			       tcp_get_pcount(&tp->sacked_out),
+			       tp->ca_state);
+			tcp_set_pcount(&tp->sacked_out, 0);
 		}
-		if (tp->retrans_out) {
-			printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out,
-							    tp->ca_state);
-			tp->retrans_out = 0;
+		if (tcp_get_pcount(&tp->retrans_out)) {
+			printk(KERN_DEBUG "Leak r=%u %d\n",
+			       tcp_get_pcount(&tp->retrans_out),
+			       tp->ca_state);
+			tcp_set_pcount(&tp->retrans_out, 0);
 		}
 	}
 #endif
@@ -2594,18 +2700,16 @@ static void westwood_filter(struct sock *sk, __u32 delta)
  * WESTWOOD_RTT_MIN minimum bound since we could be on a LAN!
  */
 
-static inline __u32 westwood_update_rttmin(struct sock *sk)
+static inline __u32 westwood_update_rttmin(const struct sock *sk)
 {
-	struct tcp_opt *tp = tcp_sk(sk);
+	const struct tcp_opt *tp = tcp_sk(sk);
 	__u32 rttmin = tp->westwood.rtt_min;
 
-	if (tp->westwood.rtt == 0)
-		return(rttmin);
-
-	if (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin)
+	if (tp->westwood.rtt != 0 &&
+	    (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin))
 		rttmin = tp->westwood.rtt;
 
-	return(rttmin);
+	return rttmin;
 }
 
 /*
@@ -2613,11 +2717,11 @@ static inline __u32 westwood_update_rttmin(struct sock *sk)
  * Evaluate increases for dk. 
  */
 
-static inline __u32 westwood_acked(struct sock *sk)
+static inline __u32 westwood_acked(const struct sock *sk)
 {
-	struct tcp_opt *tp = tcp_sk(sk);
+	const struct tcp_opt *tp = tcp_sk(sk);
 
-	return ((tp->snd_una) - (tp->westwood.snd_una));
+	return tp->snd_una - tp->westwood.snd_una;
 }
 
 /*
@@ -2629,9 +2733,9 @@ static inline __u32 westwood_acked(struct sock *sk)
  * window, 1 if the sample has to be considered in the next window.
  */
 
-static int westwood_new_window(struct sock *sk)
+static int westwood_new_window(const struct sock *sk)
 {
-	struct tcp_opt *tp = tcp_sk(sk);
+	const struct tcp_opt *tp = tcp_sk(sk);
 	__u32 left_bound;
 	__u32 rtt;
 	int ret = 0;
@@ -2665,14 +2769,13 @@ static void __westwood_update_window(struct sock *sk, __u32 now)
 	struct tcp_opt *tp = tcp_sk(sk);
 	__u32 delta = now - tp->westwood.rtt_win_sx;
 
-        if (!delta)
-                return;
-
-	if (tp->westwood.rtt)
-                westwood_filter(sk, delta);
+        if (delta) {
+		if (tp->westwood.rtt)
+			westwood_filter(sk, delta);
 
-        tp->westwood.bk = 0;
-        tp->westwood.rtt_win_sx = tcp_time_stamp;
+		tp->westwood.bk = 0;
+		tp->westwood.rtt_win_sx = tcp_time_stamp;
+	}
 }
 
 
@@ -2710,19 +2813,19 @@ static void westwood_dupack_update(struct sock *sk)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
 
-	tp->westwood.accounted += tp->mss_cache;
-	tp->westwood.cumul_ack = tp->mss_cache;
+	tp->westwood.accounted += tp->mss_cache_std;
+	tp->westwood.cumul_ack = tp->mss_cache_std;
 }
 
 static inline int westwood_may_change_cumul(struct tcp_opt *tp)
 {
-	return ((tp->westwood.cumul_ack) > tp->mss_cache);
+	return (tp->westwood.cumul_ack > tp->mss_cache_std);
 }
 
 static inline void westwood_partial_update(struct tcp_opt *tp)
 {
 	tp->westwood.accounted -= tp->westwood.cumul_ack;
-	tp->westwood.cumul_ack = tp->mss_cache;
+	tp->westwood.cumul_ack = tp->mss_cache_std;
 }
 
 static inline void westwood_complete_update(struct tcp_opt *tp)
@@ -2737,7 +2840,7 @@ static inline void westwood_complete_update(struct tcp_opt *tp)
  * delayed or partial acks.
  */
 
-static __u32 westwood_acked_count(struct sock *sk)
+static inline __u32 westwood_acked_count(struct sock *sk)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
 
@@ -2751,7 +2854,7 @@ static __u32 westwood_acked_count(struct sock *sk)
 
         if (westwood_may_change_cumul(tp)) {
 		/* Partial or delayed ack */
-		if ((tp->westwood.accounted) >= (tp->westwood.cumul_ack))
+		if (tp->westwood.accounted >= tp->westwood.cumul_ack)
 			westwood_partial_update(tp);
 		else
 			westwood_complete_update(tp);
@@ -2833,7 +2936,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	 */
 	sk->sk_err_soft = 0;
 	tp->rcv_tstamp = tcp_time_stamp;
-	prior_packets = tp->packets_out;
+	prior_packets = tcp_get_pcount(&tp->packets_out);
 	if (!prior_packets)
 		goto no_queue;
 
@@ -3855,11 +3958,11 @@ static void tcp_new_space(struct sock *sk)
 {
 	struct tcp_opt *tp = tcp_sk(sk);
 
-	if (tp->packets_out < tp->snd_cwnd &&
+	if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd &&
 	    !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
 	    !tcp_memory_pressure &&
 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
- 		int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
+ 		int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
 						   tp->reordering + 1);