* Pasi Sarolahti: F-RTO for dealing with spurious RTOs
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
+#include <net/netdma.h>
int sysctl_tcp_timestamps = 1;
int sysctl_tcp_window_scaling = 1;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc = 1;
+int sysctl_tcp_abc;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
tp->snd_cwnd_stamp = tcp_time_stamp;
}
+/* Lower bound on congestion window is slow start threshold
+ * unless congestion avoidance choice decides to overide it.
+ */
+static inline u32 tcp_cwnd_min(const struct sock *sk)
+{
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+
+ return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
+}
+
/* Decrease cwnd each second ack. */
static void tcp_cwnd_down(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int decr = tp->snd_cwnd_cnt + 1;
tp->snd_cwnd_cnt = decr&1;
decr >>= 1;
- if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk))
+ if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
tp->snd_cwnd -= decr;
tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
return acked;
}
-static u32 tcp_usrtt(const struct sk_buff *skb)
+static u32 tcp_usrtt(struct timeval *tv)
{
- struct timeval tv, now;
+ struct timeval now;
do_gettimeofday(&now);
- skb_get_timestamp(skb, &tv);
- return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+ return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
}
/* Remove acknowledged frames from the retransmission queue. */
u32 pkts_acked = 0;
void (*rtt_sample)(struct sock *sk, u32 usrtt)
= icsk->icsk_ca_ops->rtt_sample;
+ struct timeval tv;
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
seq_rtt = -1;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- if (rtt_sample)
- (*rtt_sample)(sk, tcp_usrtt(skb));
+ skb_get_timestamp(skb, &tv);
}
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
}
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- if (rtt_sample)
- (*rtt_sample)(sk, tcp_usrtt(skb));
+ skb_get_timestamp(skb, &tv);
}
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
if (acked&FLAG_ACKED) {
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk, tp);
+ if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
+ (*rtt_sample)(sk, tcp_usrtt(&tv));
if (icsk->icsk_ca_ops->pkts_acked)
icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
if (before(ack, prior_snd_una))
goto old_ack;
- if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
- tp->bytes_acked += ack - prior_snd_una;
+ if (sysctl_tcp_abc) {
+ if (icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+ else if (icsk->icsk_ca_state == TCP_CA_Loss)
+ /* we assume just one segment left network */
+ tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+ }
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
- u32 win_used = max(tp->snd_cwnd_used, 2U);
+ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ u32 win_used = max(tp->snd_cwnd_used, init_win);
if (win_used < tp->snd_cwnd) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
__tcp_checksum_complete_user(sk, skb);
}
+#ifdef CONFIG_NET_DMA
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int chunk = skb->len - hlen;
+ int dma_cookie;
+ int copied_early = 0;
+
+ if (tp->ucopy.wakeup)
+ return 0;
+
+ if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+ tp->ucopy.dma_chan = get_softnet_dma();
+
+ if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+
+ dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
+ skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
+
+ if (dma_cookie < 0)
+ goto out;
+
+ tp->ucopy.dma_cookie = dma_cookie;
+ copied_early = 1;
+
+ tp->ucopy.len -= chunk;
+ tp->copied_seq += chunk;
+ tcp_rcv_space_adjust(sk);
+
+ if ((tp->ucopy.len == 0) ||
+ (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+ (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
+ tp->ucopy.wakeup = 1;
+ sk->sk_data_ready(sk, 0);
+ }
+ } else if (chunk > 0) {
+ tp->ucopy.wakeup = 1;
+ sk->sk_data_ready(sk, 0);
+ }
+out:
+ return copied_early;
+}
+#endif /* CONFIG_NET_DMA */
+
/*
* TCP receive function for the ESTABLISHED state.
*
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
- tcp_rcv_rtt_measure_ts(sk, skb);
-
/* We know that such packets are checksummed
* on entry.
*/
}
} else {
int eaten = 0;
+ int copied_early = 0;
- if (tp->ucopy.task == current &&
- tp->copied_seq == tp->rcv_nxt &&
- len - tcp_header_len <= tp->ucopy.len &&
- sock_owned_by_user(sk)) {
- __set_current_state(TASK_RUNNING);
+ if (tp->copied_seq == tp->rcv_nxt &&
+ len - tcp_header_len <= tp->ucopy.len) {
+#ifdef CONFIG_NET_DMA
+ if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ copied_early = 1;
+ eaten = 1;
+ }
+#endif
+ if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
+ __set_current_state(TASK_RUNNING);
- if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
+ if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
+ eaten = 1;
+ }
+ if (eaten) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
- eaten = 1;
}
+ if (copied_early)
+ tcp_cleanup_rbuf(sk, skb->len);
}
if (!eaten) {
if (tcp_checksum_complete_user(sk, skb))
__tcp_ack_snd_check(sk, 0);
no_ack:
+#ifdef CONFIG_NET_DMA
+ if (copied_early)
+ __skb_queue_tail(&sk->sk_async_wait_queue, skb);
+ else
+#endif
if (eaten)
__kfree_skb(skb);
else
*/
TCP_ECN_rcv_synack(tp, th);
- if (tp->ecn_flags&TCP_ECN_OK)
- sock_set_flag(sk, SOCK_NO_LARGESEND);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH);
tp->max_window = tp->snd_wnd;
TCP_ECN_rcv_syn(tp, th);
- if (tp->ecn_flags&TCP_ECN_OK)
- sock_set_flag(sk, SOCK_NO_LARGESEND);
tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);