X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_output.c;h=39214126cf9efc0af485fd836a58e5ffd00c35f8;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=68ab99fc0370190e9a3a3aa4fa95287209e69710;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 68ab99fc0..39214126c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,6 +45,12 @@ /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse = 1; +/* This limits the percentage of the congestion window which we + * will allow a single TSO frame to consume. Building TSO frames + * which are too large can cause TCP streams to be bursty. + */ +int sysctl_tcp_tso_win_divisor = 8; + static __inline__ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) { @@ -52,8 +58,7 @@ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); } /* SND.NXT, if window was not shrunk. @@ -123,7 +128,8 @@ static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *s { u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) + if (!tcp_get_pcount(&tp->packets_out) && + (s32)(now - tp->lsndtime) > tp->rto) tcp_cwnd_restart(tp, __sk_dst_get(sk)); tp->lsndtime = now; @@ -143,6 +149,65 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk) tcp_clear_xmit_timer(sk, TCP_TIME_DACK); } +/* Determine a window scaling and initial window to offer. + * Based on the assumption that the given amount of space + * will be offered. Store the results in the tp structure. + * NOTE: for smooth operation initial space offering should + * be a multiple of mss if possible. We assume here that mss >= 1. + * This MUST be enforced by all callers. + */ +void tcp_select_initial_window(int __space, __u32 mss, + __u32 *rcv_wnd, __u32 *window_clamp, + int wscale_ok, __u8 *rcv_wscale) +{ + unsigned int space = (__space < 0 ? 0 : __space); + + /* If no clamp set the clamp to the max possible scaled window */ + if (*window_clamp == 0) + (*window_clamp) = (65535 << 14); + space = min(*window_clamp, space); + + /* Quantize space offering to a multiple of mss if possible. */ + if (space > mss) + space = (space / mss) * mss; + + /* NOTE: offering an initial window larger than 32767 + * will break some buggy TCP stacks. We try to be nice. + * If we are not window scaling, then this truncates + * our initial window offering to 32k. There should also + * be a sysctl option to stop being nice. + */ + (*rcv_wnd) = min(space, MAX_TCP_WINDOW); + (*rcv_wscale) = 0; + if (wscale_ok) { + /* Set window scaling on max possible window + * See RFC1323 for an explanation of the limit to 14 + */ + space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + while (space > 65535 && (*rcv_wscale) < 14) { + space >>= 1; + (*rcv_wscale)++; + } + } + + /* Set initial window to value enough for senders, + * following RFC1414. Senders, not following this RFC, + * will be satisfied with 2. + */ + if (mss > (1<<*rcv_wscale)) { + int init_cwnd = 4; + if (mss > 1460*3) + init_cwnd = 2; + else if (mss > 1460) + init_cwnd = 3; + if (*rcv_wnd > init_cwnd*mss) + *rcv_wnd = init_cwnd*mss; + } + + /* Set the clamp no higher than max representable value */ + (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); +} + /* Chose a new window to advertise, update state in tcp_opt for the * socket, and return result with RFC1323 scaling applied. The return * value can be stuffed directly into th->window for an outgoing @@ -198,9 +263,9 @@ static __inline__ u16 tcp_select_window(struct sock *sk) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { - if(skb != NULL) { + if (skb != NULL) { struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -209,6 +274,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) int sysctl_flags; int err; + BUG_ON(!tcp_skb_pcount(skb)); + #define SYSCTL_FLAG_TSTAMPS 0x1 #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 @@ -355,13 +422,30 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) { sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); return; } } } +void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_std) +{ + if (skb->len <= mss_std) { + /* Avoid the costly divide in the normal + * non-TSO case. + */ + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; + } else { + unsigned int factor; + + factor = skb->len + (mss_std - 1); + factor /= mss_std; + skb_shinfo(skb)->tso_segs = factor; + skb_shinfo(skb)->tso_size = mss_std; + } +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -394,11 +478,9 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); - if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out++; - tp->left_out++; - } + TCP_SKB_CB(buff)->sacked = + (TCP_SKB_CB(skb)->sacked & + (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { @@ -421,6 +503,25 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + tcp_dec_pcount(&tp->lost_out, skb); + tcp_dec_pcount(&tp->left_out, skb); + } + + /* Fix up tso_factor for both original and new SKB. */ + tcp_set_skb_tso_segs(skb, tp->mss_cache_std); + tcp_set_skb_tso_segs(buff, tp->mss_cache_std); + + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + tcp_inc_pcount(&tp->lost_out, skb); + tcp_inc_pcount(&tp->left_out, skb); + } + + if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { + tcp_inc_pcount(&tp->lost_out, buff); + tcp_inc_pcount(&tp->left_out, buff); + } + /* Link BUFF into the send queue. */ __skb_append(skb, buff); @@ -431,7 +532,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -unsigned char * __pskb_trim_head(struct sk_buff *skb, int len) +static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; @@ -459,8 +560,10 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len) return skb->tail; } -static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) +int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { + struct tcp_opt *tp = tcp_sk(sk); + if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; @@ -474,6 +577,17 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_HW; + + skb->truesize -= len; + sk->sk_queue_shrunk = 1; + sk->sk_wmem_queued -= len; + sk->sk_forward_alloc += len; + + /* Any change of skb->len requires recalculation of tso + * factor and mss. + */ + tcp_set_skb_tso_segs(skb, tp->mss_cache_std); + return 0; } @@ -500,7 +614,7 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) this function. --ANK (980731) */ -int tcp_sync_mss(struct sock *sk, u32 pmtu) +unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_opt *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); @@ -536,23 +650,71 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) tp->pmtu_cookie = pmtu; tp->mss_cache = tp->mss_cache_std = mss_now; - if (sk->sk_route_caps & NETIF_F_TSO) { - int large_mss; + return mss_now; +} + +/* Compute the current effective MSS, taking SACKs and IP options, + * and even PMTU discovery events into account. + * + * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * cannot be large. However, taking into account rare use of URG, this + * is not a big flaw. + */ + +unsigned int tcp_current_mss(struct sock *sk, int large) +{ + struct tcp_opt *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + unsigned int do_large, mss_now; + + mss_now = tp->mss_cache_std; + if (dst) { + u32 mtu = dst_pmtu(dst); + if (mtu != tp->pmtu_cookie || + tp->ext2_header_len != dst->header_len) + mss_now = tcp_sync_mss(sk, mtu); + } + + do_large = (large && + (sk->sk_route_caps & NETIF_F_TSO) && + !tp->urg_mode); + + if (do_large) { + unsigned int large_mss, factor, limit; large_mss = 65535 - tp->af_specific->net_header_len - - tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len; + tp->ext_header_len - tp->ext2_header_len - + tp->tcp_header_len; if (tp->max_window && large_mss > (tp->max_window>>1)) - large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len); + large_mss = max((tp->max_window>>1), + 68U - tp->tcp_header_len); + + factor = large_mss / mss_now; - /* Always keep large mss multiple of real mss. */ - tp->mss_cache = mss_now*(large_mss/mss_now); + /* Always keep large mss multiple of real mss, but + * do not exceed 1/tso_win_divisor of the congestion window + * so we can keep the ACK clock ticking and minimize + * bursting. + */ + limit = tp->snd_cwnd; + if (sysctl_tcp_tso_win_divisor) + limit /= sysctl_tcp_tso_win_divisor; + limit = max(1U, limit); + if (factor > limit) + factor = limit; + + tp->mss_cache = mss_now * factor; + + mss_now = tp->mss_cache; } + if (tp->eff_sacks) + mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + + (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)); return mss_now; } - /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -592,8 +754,12 @@ int tcp_write_xmit(struct sock *sk, int nonagle) TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) break; - /* Advance the send_head. This one is sent out. */ + + /* Advance the send_head. This one is sent out. + * This call will increment packets_out. + */ update_send_head(sk, tp, skb); + tcp_minshall_update(tp, mss_now, skb); sent_pkts = 1; } @@ -603,7 +769,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) return 0; } - return !tp->packets_out && sk->sk_send_head; + return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head; } return 0; } @@ -729,7 +895,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ - if(!skb_cloned(skb) && !skb_cloned(next_skb)) { + if (!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; @@ -749,6 +915,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m ((skb_size + next_skb_size) > mss_now)) return; + BUG_ON(tcp_skb_pcount(skb) != 1 || + tcp_skb_pcount(next_skb) != 1); + /* Ok. We will be able to collapse the packet. */ __skb_unlink(next_skb, next_skb->list); @@ -772,24 +941,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { - tp->lost_out--; - tp->left_out--; + tcp_dec_pcount(&tp->lost_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Reno case is special. Sigh... */ - if (!tp->sack_ok && tp->sacked_out) { - tp->sacked_out--; - tp->left_out--; + if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) { + tcp_dec_pcount_approx(&tp->sacked_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Not quite right: it can be > snd.fack, but * it is better to underestimate fackets. */ - if (tp->fackets_out) - tp->fackets_out--; + tcp_dec_pcount_approx(&tp->fackets_out, next_skb); + tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); - tp->packets_out--; } } @@ -809,11 +977,11 @@ void tcp_simple_retransmit(struct sock *sk) !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); lost = 1; } } @@ -879,12 +1047,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; - if(skb->len > cur_mss) { - if(tcp_fragment(sk, skb, cur_mss)) + if (skb->len > cur_mss) { + int old_factor = tcp_skb_pcount(skb); + int new_factor; + + if (tcp_fragment(sk, skb, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ - tp->packets_out++; + new_factor = tcp_skb_pcount(skb); + tcp_dec_pcount_explicit(&tp->packets_out, + old_factor - new_factor); + tcp_inc_pcount(&tp->packets_out, skb->next); } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -908,6 +1082,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; } @@ -933,7 +1109,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; - tp->retrans_out++; + tcp_inc_pcount(&tp->retrans_out, skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) @@ -961,13 +1137,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt = tp->lost_out; + int packet_cnt = tcp_get_pcount(&tp->lost_out); /* First pass: retransmit lost packets. */ if (packet_cnt) { sk_stream_for_retrans_queue(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + /* Assume this retransmit will generate + * only one packet for congestion window + * calculation purposes. This works because + * tcp_retransmit_skb() will chop up the + * packet to be MSS sized and all the + * packet counting works out. + */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; @@ -985,7 +1168,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } - if (--packet_cnt <= 0) + packet_cnt -= tcp_skb_pcount(skb); + if (packet_cnt <= 0) break; } } @@ -1004,7 +1188,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * - * For now we do not retrnamsit anything, while we have some new + * For now we do not retransmit anything, while we have some new * segments to send. */ @@ -1014,17 +1198,23 @@ void tcp_xmit_retransmit_queue(struct sock *sk) packet_cnt = 0; sk_stream_for_retrans_queue(skb, sk) { - if(++packet_cnt > tp->fackets_out) + /* Similar to the retransmit loop above we + * can pretend that the retransmitted SKB + * we send out here will be composed of one + * real MSS sized packet because tcp_retransmit_skb() + * will fragment it if necessary. + */ + if (++packet_cnt > tcp_get_pcount(&tp->fackets_out)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) break; - if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) continue; /* Ok, retransmit it. */ - if(tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb)) break; if (skb == skb_peek(&sk->sk_write_queue)) @@ -1042,13 +1232,13 @@ void tcp_send_fin(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); - unsigned int mss_now; + int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ - mss_now = tcp_current_mss(sk, 1); + mss_now = tcp_current_mss(sk, 1); if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -1068,6 +1258,8 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; @@ -1099,6 +1291,8 @@ void tcp_send_active_reset(struct sock *sk, int priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -1178,6 +1372,9 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->dest = req->rmt_port; TCP_SKB_CB(skb)->seq = req->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; + TCP_SKB_CB(skb)->sacked = 0; + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(req->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ @@ -1233,7 +1430,7 @@ static inline void tcp_connect_init(struct sock *sk) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(sk); - tcp_vegas_init(tp); + tcp_ca_init(tp); tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), @@ -1279,19 +1476,21 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(sk, tp, buff); TCP_SKB_CB(buff)->sacked = 0; + skb_shinfo(buff)->tso_segs = 1; + skb_shinfo(buff)->tso_size = 0; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; - tcp_vegas_init(tp); + tcp_ca_init(tp); /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; tp->retrans_stamp = TCP_SKB_CB(buff)->when; __skb_queue_tail(&sk->sk_write_queue, buff); sk_charge_skb(sk, buff); - tp->packets_out++; + tcp_inc_pcount(&tp->packets_out, buff); tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); @@ -1378,6 +1577,8 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; + skb_shinfo(buff)->tso_segs = 1; + skb_shinfo(buff)->tso_size = 0; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -1412,6 +1613,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = urgent; + skb_shinfo(skb)->tso_segs = 1; + skb_shinfo(skb)->tso_size = 0; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just @@ -1432,8 +1635,8 @@ int tcp_write_wakeup(struct sock *sk) if ((skb = sk->sk_send_head) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; - int mss = tcp_current_mss(sk, 0); - int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; + unsigned int mss = tcp_current_mss(sk, 0); + unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; @@ -1455,7 +1658,9 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_route_caps &= ~NETIF_F_TSO; tp->mss_cache = tp->mss_cache_std; } - } + } else if (!tcp_skb_pcount(skb)) + tcp_set_skb_tso_segs(skb, tp->mss_cache_std); + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); @@ -1483,7 +1688,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tp->packets_out || !sk->sk_send_head) { + if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; tp->backoff = 0; @@ -1517,6 +1722,5 @@ EXPORT_SYMBOL(tcp_make_synack); EXPORT_SYMBOL(tcp_send_synack); EXPORT_SYMBOL(tcp_simple_retransmit); EXPORT_SYMBOL(tcp_sync_mss); -EXPORT_SYMBOL(tcp_transmit_skb); EXPORT_SYMBOL(tcp_write_wakeup); EXPORT_SYMBOL(tcp_write_xmit);