X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_output.c;h=68ab99fc0370190e9a3a3aa4fa95287209e69710;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=bc5fba4a39fd0bbd578d21bc0aefd1c2be7f326d;hpb=db216c3d5e4c040e557a50f8f5d35d5c415e8c1c;p=linux-2.6.git diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bc5fba4a3..68ab99fc0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -48,9 +48,9 @@ int sysctl_tcp_retrans_collapse = 1; static __inline__ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) { - tp->send_head = skb->next; - if (tp->send_head == (struct sk_buff *)&sk->sk_write_queue) - tp->send_head = NULL; + sk->sk_send_head = skb->next; + if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) + sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -168,6 +168,14 @@ static __inline__ u16 tcp_select_window(struct sock *sk) tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; + /* Make sure we do not exceed the maximum possible + * scaled window. + */ + if (!tp->rcv_wscale) + new_win = min(new_win, MAX_TCP_WINDOW); + else + new_win = min(new_win, (65535U << tp->rcv_wscale)); + /* RFC1323 scaling applied */ new_win >>= tp->rcv_wscale; @@ -291,7 +299,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TcpOutSegs); + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = tp->af_specific->queue_xmit(skb, 0); if (err <= 0) @@ -326,11 +334,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_queue_tail(&sk->sk_write_queue, skb); - tcp_charge_skb(sk, skb); + sk_charge_skb(sk, skb); /* Queue it, remembering where we must start sending. */ - if (tp->send_head == NULL) - tp->send_head = skb; + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; } /* Send _single_ skb sitting at the send head. This function requires @@ -339,13 +347,13 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) void tcp_push_one(struct sock *sk, unsigned cur_mss) { struct tcp_opt *tp = tcp_sk(sk); - struct sk_buff *skb = tp->send_head; + struct sk_buff *skb = sk->sk_send_head; if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) { - tp->send_head = NULL; + sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -354,70 +362,6 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) } } -/* Split fragmented skb to two parts at length len. */ - -static void skb_split(struct sk_buff *skb, struct sk_buff *skb1, u32 len) -{ - int i; - int pos = skb_headlen(skb); - - if (len < pos) { - /* Split line is inside header. */ - memcpy(skb_put(skb1, pos-len), skb->data + len, pos-len); - - /* And move data appendix as is. */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - - skb1->data_len = skb->data_len; - skb1->len += skb1->data_len; - skb->data_len = 0; - skb->len = len; - skb->tail = skb->data+len; - } else { - int k = 0; - int nfrags = skb_shinfo(skb)->nr_frags; - - /* Second chunk has no header, nothing to copy. */ - - skb_shinfo(skb)->nr_frags = 0; - skb1->len = skb1->data_len = skb->len - len; - skb->len = len; - skb->data_len = len - pos; - - for (i=0; ifrags[i].size; - if (pos + size > len) { - skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; - - if (pos < len) { - /* Split frag. - * We have to variants in this case: - * 1. Move all the frag to the second - * part, if it is possible. F.e. - * this approach is mandatory for TUX, - * where splitting is expensive. - * 2. Split is accurately. We make this. - */ - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb1)->frags[0].page_offset += (len-pos); - skb_shinfo(skb1)->frags[0].size -= (len-pos); - skb_shinfo(skb)->frags[i].size = len-pos; - skb_shinfo(skb)->nr_frags++; - } - k++; - } else { - skb_shinfo(skb)->nr_frags++; - } - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; - } -} - /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -436,10 +380,10 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) return -ENOMEM; /* Get a new skb... force flag on. */ - buff = tcp_alloc_skb(sk, nsize, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC); if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ - tcp_charge_skb(sk, buff); + sk_charge_skb(sk, buff); /* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; @@ -636,8 +580,10 @@ int tcp_write_xmit(struct sock *sk, int nonagle) */ mss_now = tcp_current_mss(sk, 1); - while((skb = tp->send_head) && - tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)) { + while ((skb = sk->sk_send_head) && + tcp_snd_test(tp, skb, mss_now, + tcp_skb_is_last(sk, skb) ? nonagle : + TCP_NAGLE_PUSH)) { if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; @@ -657,7 +603,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) return 0; } - return !tp->packets_out && tp->send_head; + return !tp->packets_out && sk->sk_send_head; } return 0; } @@ -744,17 +690,32 @@ u32 __tcp_select_window(struct sock *sk) if (free_space > tp->rcv_ssthresh) free_space = tp->rcv_ssthresh; - /* Get the largest window that is a nice multiple of mss. - * Window clamp already applied above. - * If our current window offering is within 1 mss of the - * free space we just keep it. This prevents the divide - * and multiply from happening most of the time. - * We also don't do any window rounding when the free space - * is too small. + /* Don't do rounding if we are using window scaling, since the + * scaled window will not line up with the MSS boundary anyway. */ window = tp->rcv_wnd; - if (window <= free_space - mss || window > free_space) - window = (free_space/mss)*mss; + if (tp->rcv_wscale) { + window = free_space; + + /* Advertise enough space so that it won't get scaled away. + * Import case: prevent zero window announcement if + * 1< mss. + */ + if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window) + window = (((window >> tp->rcv_wscale) + 1) + << tp->rcv_wscale); + } else { + /* Get the largest window that is a nice multiple of mss. + * Window clamp already applied above. + * If our current window offering is within 1 mss of the + * free space we just keep it. This prevents the divide + * and multiply from happening most of the time. + * We also don't do any window rounding when the free space + * is too small. + */ + if (window <= free_space - mss || window > free_space) + window = (free_space/mss)*mss; + } return window; } @@ -827,7 +788,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ if (tp->fackets_out) tp->fackets_out--; - tcp_free_skb(sk, next_skb); + sk_stream_free_skb(sk, next_skb); tp->packets_out--; } } @@ -843,7 +804,7 @@ void tcp_simple_retransmit(struct sock *sk) unsigned int mss = tcp_current_mss(sk, 0); int lost = 0; - for_retrans_queue(skb, sk, tp) { + sk_stream_for_retrans_queue(skb, sk) { if (skb->len > mss && !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { @@ -929,7 +890,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (skb->next != tp->send_head) && + (skb->next != sk->sk_send_head) && (skb->next != (struct sk_buff *)&sk->sk_write_queue) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && (sysctl_tcp_retrans_collapse != 0)) @@ -963,7 +924,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (err == 0) { /* Update global TCP statistics. */ - TCP_INC_STATS(TcpRetransSegs); + TCP_INC_STATS(TCP_MIB_RETRANSSEGS); #if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { @@ -1004,7 +965,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* First pass: retransmit lost packets. */ if (packet_cnt) { - for_retrans_queue(skb, sk, tp) { + sk_stream_for_retrans_queue(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) @@ -1015,9 +976,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (tcp_retransmit_skb(sk, skb)) return; if (tp->ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(TCPFastRetrans); + NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else - NET_INC_STATS_BH(TCPSlowStartRetrans); + NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); if (skb == skb_peek(&sk->sk_write_queue)) @@ -1052,7 +1013,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) packet_cnt = 0; - for_retrans_queue(skb, sk, tp) { + sk_stream_for_retrans_queue(skb, sk) { if(++packet_cnt > tp->fackets_out) break; @@ -1069,7 +1030,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - NET_INC_STATS_BH(TCPForwardRetrans); + NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } } @@ -1089,7 +1050,7 @@ void tcp_send_fin(struct sock *sk) */ mss_now = tcp_current_mss(sk, 1); - if(tp->send_head != NULL) { + if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; @@ -1129,7 +1090,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) /* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { - NET_INC_STATS(TCPAbortFailed); + NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); return; } @@ -1144,7 +1105,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb)) - NET_INC_STATS(TCPAbortFailed); + NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); } /* WARNING: This routine must only be called when we have already sent @@ -1168,8 +1129,8 @@ int tcp_send_synack(struct sock *sk) return -ENOMEM; __skb_unlink(skb, &sk->sk_write_queue); __skb_queue_head(&sk->sk_write_queue, nskb); - tcp_free_skb(sk, skb); - tcp_charge_skb(sk, nskb); + sk_stream_free_skb(sk, skb); + sk_charge_skb(sk, nskb); skb = nskb; } @@ -1244,7 +1205,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb->csum = 0; th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(TcpOutSegs); + TCP_INC_STATS(TCP_MIB_OUTSEGS); return skb; } @@ -1329,10 +1290,10 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->when = tcp_time_stamp; tp->retrans_stamp = TCP_SKB_CB(buff)->when; __skb_queue_tail(&sk->sk_write_queue, buff); - tcp_charge_skb(sk, buff); + sk_charge_skb(sk, buff); tp->packets_out++; tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); - TCP_INC_STATS(TcpActiveOpens); + TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -1468,7 +1429,7 @@ int tcp_write_wakeup(struct sock *sk) struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - if ((skb = tp->send_head) != NULL && + if ((skb = sk->sk_send_head) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; int mss = tcp_current_mss(sk, 0); @@ -1522,7 +1483,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tp->packets_out || !tp->send_head) { + if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; tp->backoff = 0;