#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
-/* SLAB cache for TCP socks */
-extern kmem_cache_t *tcp_sk_cachep;
-
extern kmem_cache_t *tcp_bucket_cachep;
extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
unsigned short snum);
extern int sysctl_tcp_bic;
extern int sysctl_tcp_bic_fast_convergence;
extern int sysctl_tcp_bic_low_window;
-extern int sysctl_tcp_default_win_scale;
extern int sysctl_tcp_moderate_rcvbuf;
+extern int sysctl_tcp_tso_win_divisor;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
extern void tcp_xmit_retransmit_queue(struct sock *);
extern void tcp_simple_retransmit(struct sock *);
+extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
extern void tcp_send_probe0(struct sock *);
extern void tcp_send_partial(struct sock *);
extern void tcp_send_fin(struct sock *sk);
extern void tcp_send_active_reset(struct sock *sk, int priority);
extern int tcp_send_synack(struct sock *);
-extern int tcp_transmit_skb(struct sock *, struct sk_buff *);
extern void tcp_push_one(struct sock *, unsigned mss_now);
extern void tcp_send_ack(struct sock *sk);
extern void tcp_send_delayed_ack(struct sock *sk);
extern void tcp_delete_keepalive_timer (struct sock *);
extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
-extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
+extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
+extern unsigned int tcp_current_mss(struct sock *sk, int large);
extern const char timer_bug_msg[];
};
}
-/* Compute the current effective MSS, taking SACKs and IP options,
- * and even PMTU discovery events into account.
- *
- * LARGESEND note: !urg_mode is overkill, only frames up to snd_up
- * cannot be large. However, taking into account rare use of URG, this
- * is not a big flaw.
- */
-
-static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
-{
- struct tcp_opt *tp = tcp_sk(sk);
- struct dst_entry *dst = __sk_dst_get(sk);
- int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) &&
- !tp->urg_mode ?
- tp->mss_cache : tp->mss_cache_std;
-
- if (dst) {
- u32 mtu = dst_pmtu(dst);
- if (mtu != tp->pmtu_cookie ||
- tp->ext2_header_len != dst->header_len)
- mss_now = tcp_sync_mss(sk, mtu);
- }
- if (tp->eff_sacks)
- mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
- return mss_now;
-}
-
/* Initialize RCV_MSS value.
* RCV_MSS is an our guess about MSS used by the peer.
* We haven't any direct information about the MSS.
#include <net/tcp_ecn.h>
+/* Due to TSO, an SKB can be composed of multiple actual
+ * packets. To keep these tracked properly, we use this.
+ */
+static inline int tcp_skb_pcount(struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tso_segs;
+}
+
+/* This is valid iff tcp_skb_pcount() > 1. */
+static inline int tcp_skb_mss(struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tso_size;
+}
+
+static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb)
+{
+ count->val += tcp_skb_pcount(skb);
+}
+
+static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+ count->val += amt;
+}
+
+static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+ count->val -= amt;
+}
+
+static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb)
+{
+ count->val -= tcp_skb_pcount(skb);
+}
+
+static inline void tcp_dec_pcount_approx(tcp_pcount_t *count,
+ struct sk_buff *skb)
+{
+ if (count->val) {
+ count->val -= tcp_skb_pcount(skb);
+ if ((int)count->val < 0)
+ count->val = 0;
+ }
+}
+
+static inline __u32 tcp_get_pcount(tcp_pcount_t *count)
+{
+ return count->val;
+}
+
+static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val)
+{
+ count->val = val;
+}
+
+static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp,
+ struct sk_buff *skb)
+{
+ int orig = tcp_get_pcount(&tp->packets_out);
+
+ tcp_inc_pcount(&tp->packets_out, skb);
+ if (!orig)
+ tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+}
+
+static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb)
+{
+ tcp_dec_pcount(&tp->packets_out, skb);
+}
+
/* This determines how many packets are "in the network" to the best
* of our knowledge. In many cases it is conservative, but where
* detailed information is available from the receiver (via SACK
*/
static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
{
- return tp->packets_out - tp->left_out + tp->retrans_out;
+ return (tcp_get_pcount(&tp->packets_out) -
+ tcp_get_pcount(&tp->left_out) +
+ tcp_get_pcount(&tp->retrans_out));
}
+/*
+ * Which congestion algorithim is in use on the connection.
+ */
+#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS)
+#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD)
+#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC)
+
/* Recalculate snd_ssthresh, we want to set it to:
*
* Reno:
*/
static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
{
- if (sysctl_tcp_bic) {
+ if (tcp_is_bic(tp)) {
if (sysctl_tcp_bic_fast_convergence &&
tp->snd_cwnd < tp->bictcp.last_max_cwnd)
tp->bictcp.last_max_cwnd
/* Stop taking Vegas samples for now. */
#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0)
-
-/* Is this TCP connection using Vegas (regardless of whether it is taking
- * Vegas measurements at the current time)?
- */
-#define tcp_is_vegas(__tp) ((__tp)->vegas.do_vegas)
static inline void tcp_vegas_enable(struct tcp_opt *tp)
{
/* Should we be taking Vegas samples right now? */
#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now)
-extern void tcp_vegas_init(struct tcp_opt *tp);
+extern void tcp_ca_init(struct tcp_opt *tp);
static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state)
{
static inline void tcp_sync_left_out(struct tcp_opt *tp)
{
- if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
- tp->sacked_out = tp->packets_out - tp->lost_out;
- tp->left_out = tp->sacked_out + tp->lost_out;
+ if (tp->sack_ok &&
+ (tcp_get_pcount(&tp->sacked_out) >=
+ tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
+ tcp_set_pcount(&tp->sacked_out,
+ (tcp_get_pcount(&tp->packets_out) -
+ tcp_get_pcount(&tp->lost_out)));
+ tcp_set_pcount(&tp->left_out,
+ (tcp_get_pcount(&tp->sacked_out) +
+ tcp_get_pcount(&tp->lost_out)));
}
extern void tcp_cwnd_application_limited(struct sock *sk);
static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
{
- if (tp->packets_out >= tp->snd_cwnd) {
+ __u32 packets_out = tcp_get_pcount(&tp->packets_out);
+
+ if (packets_out >= tp->snd_cwnd) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
} else {
/* Network starves. */
- if (tp->packets_out > tp->snd_cwnd_used)
- tp->snd_cwnd_used = tp->packets_out;
+ if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used)
+ tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out);
if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
tcp_cwnd_application_limited(sk);
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
((nonagle&TCP_NAGLE_CORK) ||
(!nonagle &&
- tp->packets_out &&
+ tcp_get_pcount(&tp->packets_out) &&
tcp_minshall_check(tp))));
}
+extern void tcp_set_skb_tso_segs(struct sk_buff *, unsigned int);
+
/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
* should be put on the wire right now.
*/
static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
unsigned cur_mss, int nonagle)
{
+ int pkts = tcp_skb_pcount(skb);
+
+ if (!pkts) {
+ tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+ pkts = tcp_skb_pcount(skb);
+ }
+
/* RFC 1122 - section 4.2.3.4
*
* We must queue if
*/
return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
|| !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
- ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
+ (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
}
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
{
- if (!tp->packets_out && !tp->pending)
+ if (!tcp_get_pcount(&tp->packets_out) && !tp->pending)
tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
}
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
}
-/* Determine a window scaling and initial window to offer.
- * Based on the assumption that the given amount of space
- * will be offered. Store the results in the tp structure.
- * NOTE: for smooth operation initial space offering should
- * be a multiple of mss if possible. We assume here that mss >= 1.
- * This MUST be enforced by all callers.
- */
-static inline void tcp_select_initial_window(int __space, __u32 mss,
- __u32 *rcv_wnd,
- __u32 *window_clamp,
- int wscale_ok,
- __u8 *rcv_wscale)
-{
- unsigned int space = (__space < 0 ? 0 : __space);
-
- /* If no clamp set the clamp to the max possible scaled window */
- if (*window_clamp == 0)
- (*window_clamp) = (65535 << 14);
- space = min(*window_clamp, space);
-
- /* Quantize space offering to a multiple of mss if possible. */
- if (space > mss)
- space = (space / mss) * mss;
-
- /* NOTE: offering an initial window larger than 32767
- * will break some buggy TCP stacks. We try to be nice.
- * If we are not window scaling, then this truncates
- * our initial window offering to 32k. There should also
- * be a sysctl option to stop being nice.
- */
- (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
- (*rcv_wscale) = 0;
- if (wscale_ok) {
- /* See RFC1323 for an explanation of the limit to 14 */
- while (space > 65535 && (*rcv_wscale) < 14) {
- space >>= 1;
- (*rcv_wscale)++;
- }
- if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
- space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
- (*rcv_wscale)--;
-
- *rcv_wscale = max((__u8)sysctl_tcp_default_win_scale,
- *rcv_wscale);
- }
-
- /* Set initial window to value enough for senders,
- * following RFC1414. Senders, not following this RFC,
- * will be satisfied with 2.
- */
- if (mss > (1<<*rcv_wscale)) {
- int init_cwnd = 4;
- if (mss > 1460*3)
- init_cwnd = 2;
- else if (mss > 1460)
- init_cwnd = 3;
- if (*rcv_wnd > init_cwnd*mss)
- *rcv_wnd = init_cwnd*mss;
- }
- /* Set the clamp no higher than max representable value */
- (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
-}
+/* Determine a window scaling and initial window to offer. */
+extern void tcp_select_initial_window(int __space, __u32 mss,
+ __u32 *rcv_wnd, __u32 *window_clamp,
+ int wscale_ok, __u8 *rcv_wscale);
static inline int tcp_win_from_space(int space)
{
}
/* Note: caller must be prepared to deal with negative returns */
-static inline int tcp_space(struct sock *sk)
+static inline int tcp_space(const struct sock *sk)
{
return tcp_win_from_space(sk->sk_rcvbuf -
atomic_read(&sk->sk_rmem_alloc));
}
-static inline int tcp_full_space( struct sock *sk)
+static inline int tcp_full_space(const struct sock *sk)
{
return tcp_win_from_space(sk->sk_rcvbuf);
}
static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq)
{
- if (sysctl_tcp_westwood)
+ if (tcp_is_westwood(tp))
tp->westwood.rtt = rtt_seq;
}
static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
{
- if (sysctl_tcp_westwood)
+ if (tcp_is_westwood(tcp_sk(sk)))
__tcp_westwood_fast_bw(sk, skb);
}
static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
{
- if (sysctl_tcp_westwood)
+ if (tcp_is_westwood(tcp_sk(sk)))
__tcp_westwood_slow_bw(sk, skb);
}
static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
{
return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
- (__u32) (tp->mss_cache),
+ (__u32) (tp->mss_cache_std),
2U);
}
static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
{
- return sysctl_tcp_westwood ? __tcp_westwood_bw_rttmin(tp) : 0;
+ return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0;
}
static inline int tcp_westwood_ssthresh(struct tcp_opt *tp)
{
__u32 ssthresh = 0;
- if (sysctl_tcp_westwood) {
+ if (tcp_is_westwood(tp)) {
ssthresh = __tcp_westwood_bw_rttmin(tp);
if (ssthresh)
tp->snd_ssthresh = ssthresh;
{
__u32 cwnd = 0;
- if (sysctl_tcp_westwood) {
+ if (tcp_is_westwood(tp)) {
cwnd = __tcp_westwood_bw_rttmin(tp);
if (cwnd)
tp->snd_cwnd = cwnd;