int sysctl_tcp_westwood;
int sysctl_tcp_vegas_cong_avoid;
+int sysctl_tcp_moderate_rcvbuf;
+
/* Default values of the Vegas variables, in fixed-point representation
* with V_PARAM_SHIFT bits to the right of the binary point.
*/
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_fixup_sndbuf(sk);
+ tp->rcvq_space.space = tp->rcv_wnd;
+
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
}
}
+/* Receiver "autotuning" code.
+ *
+ * The algorithm for RTT estimation w/o timestamps is based on
+ * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
+ * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
+ *
+ * More detail on this code can be found at
+ * <http://www.psc.edu/~jheffner/senior_thesis.ps>,
+ * though this reference is out of date. A new paper
+ * is pending.
+ */
+static void tcp_rcv_rtt_update(struct tcp_opt *tp, u32 sample, int win_dep)
+{
+ u32 new_sample = tp->rcv_rtt_est.rtt;
+ long m = sample;
+
+ if (m == 0)
+ m = 1;
+
+ if (new_sample != 0) {
+ /* If we sample in larger samples in the non-timestamp
+ * case, we could grossly overestimate the RTT especially
+ * with chatty applications or bulk transfer apps which
+ * are stalled on filesystem I/O.
+ *
+ * Also, since we are only going for a minimum in the
+ * non-timestamp case, we do not smoothe things out
+ * else with timestamps disabled convergance takes too
+ * long.
+ */
+ if (!win_dep) {
+ m -= (new_sample >> 3);
+ new_sample += m;
+ } else if (m < new_sample)
+ new_sample = m << 3;
+ } else {
+ /* No previous mesaure. */
+ new_sample = m << 3;
+ }
+
+ if (tp->rcv_rtt_est.rtt != new_sample)
+ tp->rcv_rtt_est.rtt = new_sample;
+}
+
+static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp)
+{
+ if (tp->rcv_rtt_est.time == 0)
+ goto new_measure;
+ if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
+ return;
+ tcp_rcv_rtt_update(tp,
+ jiffies - tp->rcv_rtt_est.time,
+ 1);
+
+new_measure:
+ tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
+ tp->rcv_rtt_est.time = tcp_time_stamp;
+}
+
+static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
+{
+ if (tp->rcv_tsecr &&
+ (TCP_SKB_CB(skb)->end_seq -
+ TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+ tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
+}
+
+/*
+ * This function should be called every time data is copied to user space.
+ * It calculates the appropriate TCP receive buffer space.
+ */
+void tcp_rcv_space_adjust(struct sock *sk)
+{
+ struct tcp_opt *tp = tcp_sk(sk);
+ int time;
+ int space;
+
+ if (tp->rcvq_space.time == 0)
+ goto new_measure;
+
+ time = tcp_time_stamp - tp->rcvq_space.time;
+ if (time < (tp->rcv_rtt_est.rtt >> 3) ||
+ tp->rcv_rtt_est.rtt == 0)
+ return;
+
+ space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
+
+ space = max(tp->rcvq_space.space, space);
+
+ if (tp->rcvq_space.space != space) {
+ int rcvmem;
+
+ tp->rcvq_space.space = space;
+
+ if (sysctl_tcp_moderate_rcvbuf) {
+ int new_clamp = space;
+
+ /* Receive space grows, normalize in order to
+ * take into account packet headers and sk_buff
+ * structure overhead.
+ */
+ space /= tp->advmss;
+ if (!space)
+ space = 1;
+ rcvmem = (tp->advmss + MAX_TCP_HEADER +
+ 16 + sizeof(struct sk_buff));
+ while (tcp_win_from_space(rcvmem) < tp->advmss)
+ rcvmem += 128;
+ space *= rcvmem;
+ space = min(space, sysctl_tcp_rmem[2]);
+ if (space > sk->sk_rcvbuf) {
+ sk->sk_rcvbuf = space;
+
+ /* Make the window clamp follow along. */
+ tp->window_clamp = new_clamp;
+ }
+ }
+ }
+
+new_measure:
+ tp->rcvq_space.seq = tp->copied_seq;
+ tp->rcvq_space.time = tcp_time_stamp;
+}
+
/* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The
tcp_measure_rcv_mss(tp, skb);
+ tcp_rcv_rtt_measure(tp);
+
now = tcp_time_stamp;
if (!tp->ack.ato) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
eaten = (chunk == skb->len && !th->fin);
+ tcp_rcv_space_adjust(sk);
}
local_bh_disable();
}
if (!err) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
+ tcp_rcv_space_adjust(sk);
}
local_bh_disable();
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
+
+ tcp_rcv_rtt_measure_ts(tp, skb);
+
/* We know that such packets are checksummed
* on entry.
*/
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
+ tcp_rcv_rtt_measure_ts(tp, skb);
+
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(TCPHPHitsToUser);
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
+ tcp_rcv_rtt_measure_ts(tp, skb);
+
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
if(th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH);
+ tcp_rcv_rtt_measure_ts(tp, skb);
+
/* Process urgent data. */
tcp_urg(sk, skb, th);