upgrade to vserver 1.9.3.17

[linux-2.6.git] / include / net / tcp.h
diff --git a/include/net/tcp.h b/include/net/tcp.h

index b1513f5..b7591b7 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -33,6 +33,7 @@
  #include <net/checksum.h>
  #include <net/sock.h>
  #include <net/snmp.h>
+#include <net/ip.h>
  #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
  #include <linux/ipv6.h>
  #endif
@@ -152,16 +153,12 @@ extern struct tcp_hashinfo {
  #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
  #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
  
-/* SLAB cache for TCP socks */
-extern kmem_cache_t *tcp_sk_cachep;
-
  extern kmem_cache_t *tcp_bucket_cachep;
  extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
                                                  unsigned short snum);
  extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
  extern void tcp_bucket_unlock(struct sock *sk);
  extern int tcp_port_rover;
-extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
  
  /* These are AF independent. */
  static __inline__ int tcp_bhashfn(__u16 lport)
@@ -275,20 +272,20 @@ static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
  
  #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
  
-static inline const u32 tcp_v4_rcv_saddr(const struct sock *sk)
+static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
  {
         return likely(sk->sk_state != TCP_TIME_WAIT) ?
                 inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
  }
  
  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline const struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
+static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
  {
         return likely(sk->sk_state != TCP_TIME_WAIT) ?
                 &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
  }
  
-static inline const struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
+static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
  {
         return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
  }
@@ -368,8 +365,8 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
  #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)    \
         (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))    && \
          ((__sk)->sk_family             == AF_INET6)            && \
-        !ipv6_addr_cmp(&inet6_sk(__sk)->daddr, (__saddr))      && \
-        !ipv6_addr_cmp(&inet6_sk(__sk)->rcv_saddr, (__daddr))  && \
+        ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))     && \
+        ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
          (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
  
  /* These can have wildcards, don't try too hard. */
@@ -614,8 +611,8 @@ extern int sysctl_tcp_nometrics_save;
  extern int sysctl_tcp_bic;
  extern int sysctl_tcp_bic_fast_convergence;
  extern int sysctl_tcp_bic_low_window;
-extern int sysctl_tcp_default_win_scale;
  extern int sysctl_tcp_moderate_rcvbuf;
+extern int sysctl_tcp_tso_win_divisor;
  
  extern atomic_t tcp_memory_allocated;
  extern atomic_t tcp_sockets_allocated;
@@ -788,6 +785,8 @@ extern void                 tcp_shutdown (struct sock *sk, int how);
  
  extern int                     tcp_v4_rcv(struct sk_buff *skb);
  
+extern struct sock *           tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif);
+
  extern int                     tcp_v4_remember_stamp(struct sock *sk);
  
  extern int                     tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
@@ -955,6 +954,7 @@ extern int tcp_write_xmit(struct sock *, int nonagle);
  extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
  extern void tcp_xmit_retransmit_queue(struct sock *);
  extern void tcp_simple_retransmit(struct sock *);
+extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
  
  extern void tcp_send_probe0(struct sock *);
  extern void tcp_send_partial(struct sock *);
@@ -962,7 +962,6 @@ extern int  tcp_write_wakeup(struct sock *);
  extern void tcp_send_fin(struct sock *sk);
  extern void tcp_send_active_reset(struct sock *sk, int priority);
  extern int  tcp_send_synack(struct sock *);
-extern int  tcp_transmit_skb(struct sock *, struct sk_buff *);
  extern void tcp_push_one(struct sock *, unsigned mss_now);
  extern void tcp_send_ack(struct sock *sk);
  extern void tcp_send_delayed_ack(struct sock *sk);
@@ -972,11 +971,14 @@ extern void cleanup_rbuf(struct sock *sk, int copied);
  extern void tcp_init_xmit_timers(struct sock *);
  extern void tcp_clear_xmit_timers(struct sock *);
  
-extern void tcp_delete_keepalive_timer (struct sock *);
-extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
-extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
+extern void tcp_delete_keepalive_timer(struct sock *);
+extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
+extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
+extern unsigned int tcp_current_mss(struct sock *sk, int large);
  
-extern const char timer_bug_msg[];
+#ifdef TCP_DEBUG
+extern const char tcp_timer_bug_msg[];
+#endif
  
  /* tcp_diag.c */
  extern void tcp_get_info(struct sock *, struct tcp_info *);
@@ -1009,7 +1011,9 @@ static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
  #endif
                 break;
         default:
-               printk(timer_bug_msg);
+#ifdef TCP_DEBUG
+               printk(tcp_timer_bug_msg);
+#endif
                 return;
         };
  
@@ -1044,38 +1048,12 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
                 break;
  
         default:
-               printk(KERN_DEBUG "bug: unknown timer value\n");
+#ifdef TCP_DEBUG
+               printk(tcp_timer_bug_msg);
+#endif
         };
  }
  
-/* Compute the current effective MSS, taking SACKs and IP options,
- * and even PMTU discovery events into account.
- *
- * LARGESEND note: !urg_mode is overkill, only frames up to snd_up
- * cannot be large. However, taking into account rare use of URG, this
- * is not a big flaw.
- */
-
-static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
-{
-       struct tcp_opt *tp = tcp_sk(sk);
-       struct dst_entry *dst = __sk_dst_get(sk);
-       int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) &&
-                     !tp->urg_mode ?
-               tp->mss_cache : tp->mss_cache_std;
-
-       if (dst) {
-               u32 mtu = dst_pmtu(dst);
-               if (mtu != tp->pmtu_cookie ||
-                   tp->ext2_header_len != dst->header_len)
-                       mss_now = tcp_sync_mss(sk, mtu);
-       }
-       if (tp->eff_sacks)
-               mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
-                           (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
-       return mss_now;
-}
-
  /* Initialize RCV_MSS value.
   * RCV_MSS is an our guess about MSS used by the peer.
   * We haven't any direct information about the MSS.
@@ -1121,7 +1099,7 @@ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
   * Rcv_nxt can be after the window if our peer push more data
   * than the offered window.
   */
-static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
+static __inline__ u32 tcp_receive_window(const struct tcp_opt *tp)
  {
         s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
  
@@ -1194,14 +1172,80 @@ struct tcp_skb_cb {
  
  #define TCP_SKB_CB(__skb)      ((struct tcp_skb_cb *)&((__skb)->cb[0]))
  
-#define for_retrans_queue(skb, sk, tp) \
-               for (skb = (sk)->sk_write_queue.next;                   \
-                    (skb != (tp)->send_head) &&                        \
-                    (skb != (struct sk_buff *)&(sk)->sk_write_queue);  \
-                    skb=skb->next)
+#include <net/tcp_ecn.h>
  
+/* Due to TSO, an SKB can be composed of multiple actual
+ * packets.  To keep these tracked properly, we use this.
+ */
+static inline int tcp_skb_pcount(const struct sk_buff *skb)
+{
+       return skb_shinfo(skb)->tso_segs;
+}
  
-#include <net/tcp_ecn.h>
+/* This is valid iff tcp_skb_pcount() > 1. */
+static inline int tcp_skb_mss(const struct sk_buff *skb)
+{
+       return skb_shinfo(skb)->tso_size;
+}
+
+static inline void tcp_inc_pcount(tcp_pcount_t *count,
+                                 const struct sk_buff *skb)
+{
+       count->val += tcp_skb_pcount(skb);
+}
+
+static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+       count->val += amt;
+}
+
+static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+       count->val -= amt;
+}
+
+static inline void tcp_dec_pcount(tcp_pcount_t *count, 
+                                 const struct sk_buff *skb)
+{
+       count->val -= tcp_skb_pcount(skb);
+}
+
+static inline void tcp_dec_pcount_approx(tcp_pcount_t *count,
+                                        const struct sk_buff *skb)
+{
+       if (count->val) {
+               count->val -= tcp_skb_pcount(skb);
+               if ((int)count->val < 0)
+                       count->val = 0;
+       }
+}
+
+static inline __u32 tcp_get_pcount(const tcp_pcount_t *count)
+{
+       return count->val;
+}
+
+static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val)
+{
+       count->val = val;
+}
+
+static inline void tcp_packets_out_inc(struct sock *sk, 
+                                      struct tcp_opt *tp,
+                                      const struct sk_buff *skb)
+{
+       int orig = tcp_get_pcount(&tp->packets_out);
+
+       tcp_inc_pcount(&tp->packets_out, skb);
+       if (!orig)
+               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+}
+
+static inline void tcp_packets_out_dec(struct tcp_opt *tp, 
+                                      const struct sk_buff *skb)
+{
+       tcp_dec_pcount(&tp->packets_out, skb);
+}
  
  /* This determines how many packets are "in the network" to the best
   * of our knowledge.  In many cases it is conservative, but where
@@ -1217,11 +1261,20 @@ struct tcp_skb_cb {
   *     "Packets left network, but not honestly ACKed yet" PLUS
   *     "Packets fast retransmitted"
   */
-static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
+static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_opt *tp)
  {
-       return tp->packets_out - tp->left_out + tp->retrans_out;
+       return (tcp_get_pcount(&tp->packets_out) -
+               tcp_get_pcount(&tp->left_out) +
+               tcp_get_pcount(&tp->retrans_out));
  }
  
+/*
+ * Which congestion algorithim is in use on the connection.
+ */
+#define tcp_is_vegas(__tp)     ((__tp)->adv_cong == TCP_VEGAS)
+#define tcp_is_westwood(__tp)  ((__tp)->adv_cong == TCP_WESTWOOD)
+#define tcp_is_bic(__tp)       ((__tp)->adv_cong == TCP_BIC)
+
  /* Recalculate snd_ssthresh, we want to set it to:
   *
   * Reno:
@@ -1234,7 +1287,7 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
   */
  static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
  {
-       if (sysctl_tcp_bic) {
+       if (tcp_is_bic(tp)) {
                 if (sysctl_tcp_bic_fast_convergence &&
                     tp->snd_cwnd < tp->bictcp.last_max_cwnd)
                         tp->bictcp.last_max_cwnd
@@ -1253,11 +1306,6 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
  
  /* Stop taking Vegas samples for now. */
  #define tcp_vegas_disable(__tp)        ((__tp)->vegas.doing_vegas_now = 0)
-
-/* Is this TCP connection using Vegas (regardless of whether it is taking
- * Vegas measurements at the current time)?
- */
-#define tcp_is_vegas(__tp)     ((__tp)->vegas.do_vegas)
      
  static inline void tcp_vegas_enable(struct tcp_opt *tp)
  {
@@ -1291,7 +1339,7 @@ static inline void tcp_vegas_enable(struct tcp_opt *tp)
  /* Should we be taking Vegas samples right now? */
  #define tcp_vegas_enabled(__tp)        ((__tp)->vegas.doing_vegas_now)
  
-extern void tcp_vegas_init(struct tcp_opt *tp);
+extern void tcp_ca_init(struct tcp_opt *tp);
  
  static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state)
  {
@@ -1320,9 +1368,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
  
  static inline void tcp_sync_left_out(struct tcp_opt *tp)
  {
-       if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
-               tp->sacked_out = tp->packets_out - tp->lost_out;
-       tp->left_out = tp->sacked_out + tp->lost_out;
+       if (tp->sack_ok &&
+           (tcp_get_pcount(&tp->sacked_out) >=
+            tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
+               tcp_set_pcount(&tp->sacked_out,
+                              (tcp_get_pcount(&tp->packets_out) -
+                               tcp_get_pcount(&tp->lost_out)));
+       tcp_set_pcount(&tp->left_out,
+                      (tcp_get_pcount(&tp->sacked_out) +
+                       tcp_get_pcount(&tp->lost_out)));
  }
  
  extern void tcp_cwnd_application_limited(struct sock *sk);
@@ -1331,14 +1385,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
  
  static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
  {
-       if (tp->packets_out >= tp->snd_cwnd) {
+       __u32 packets_out = tcp_get_pcount(&tp->packets_out);
+
+       if (packets_out >= tp->snd_cwnd) {
                 /* Network is feed fully. */
                 tp->snd_cwnd_used = 0;
                 tp->snd_cwnd_stamp = tcp_time_stamp;
         } else {
                 /* Network starves. */
-               if (tp->packets_out > tp->snd_cwnd_used)
-                       tp->snd_cwnd_used = tp->packets_out;
+               if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used)
+                       tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out);
  
                 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
                         tcp_cwnd_application_limited(sk);
@@ -1372,18 +1428,19 @@ extern __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst);
  /* Slow start with delack produces 3 packets of burst, so that
   * it is safe "de facto".
   */
-static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
+static __inline__ __u32 tcp_max_burst(const struct tcp_opt *tp)
  {
         return 3;
  }
  
-static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
+static __inline__ int tcp_minshall_check(const struct tcp_opt *tp)
  {
         return after(tp->snd_sml,tp->snd_una) &&
                 !after(tp->snd_sml, tp->snd_nxt);
  }
  
-static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
+static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, 
+                                          const struct sk_buff *skb)
  {
         if (skb->len < mss)
                 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
@@ -1399,22 +1456,33 @@ static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct s
   */
  
  static __inline__ int
-tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
+tcp_nagle_check(const struct tcp_opt *tp, const struct sk_buff *skb, 
+               unsigned mss_now, int nonagle)
  {
         return (skb->len < mss_now &&
                 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
                 ((nonagle&TCP_NAGLE_CORK) ||
                  (!nonagle &&
-                 tp->packets_out &&
+                 tcp_get_pcount(&tp->packets_out) &&
                   tcp_minshall_check(tp))));
  }
  
-/* This checks if the data bearing packet SKB (usually tp->send_head)
+extern void tcp_set_skb_tso_segs(struct sk_buff *, unsigned int);
+
+/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
   * should be put on the wire right now.
   */
-static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
+static __inline__ int tcp_snd_test(const struct tcp_opt *tp, 
+                                  struct sk_buff *skb,
                                    unsigned cur_mss, int nonagle)
  {
+       int pkts = tcp_skb_pcount(skb);
+
+       if (!pkts) {
+               tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
+               pkts = tcp_skb_pcount(skb);
+       }
+
         /*      RFC 1122 - section 4.2.3.4
          *
          *      We must queue if
@@ -1441,18 +1509,19 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
          */
         return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
                  || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
-               ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
+               (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
                  (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
                 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
  }
  
  static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
  {
-       if (!tp->packets_out && !tp->pending)
+       if (!tcp_get_pcount(&tp->packets_out) && !tp->pending)
                 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
  }
  
-static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
+static __inline__ int tcp_skb_is_last(const struct sock *sk, 
+                                     const struct sk_buff *skb)
  {
         return skb->next == (struct sk_buff *)&sk->sk_write_queue;
  }
@@ -1466,7 +1535,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk,
                                                  unsigned cur_mss,
                                                  int nonagle)
  {
-       struct sk_buff *skb = tp->send_head;
+       struct sk_buff *skb = sk->sk_send_head;
  
         if (skb) {
                 if (!tcp_skb_is_last(sk, skb))
@@ -1486,7 +1555,7 @@ static __inline__ void tcp_push_pending_frames(struct sock *sk,
  
  static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
  {
-       struct sk_buff *skb = tp->send_head;
+       struct sk_buff *skb = sk->sk_send_head;
  
         return (skb &&
                 tcp_snd_test(tp, skb, tcp_current_mss(sk, 1),
@@ -1503,7 +1572,7 @@ static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
         tp->snd_wl1 = seq;
  }
  
-extern void                    tcp_destroy_sock(struct sock *sk);
+extern void tcp_destroy_sock(struct sock *sk);
  
  
  /*
@@ -1559,7 +1628,7 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
  
                         while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
                                 sk->sk_backlog_rcv(sk, skb1);
-                               NET_INC_STATS_BH(TCPPrequeueDropped);
+                               NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED);
                         }
  
                         tp->ucopy.memory = 0;
@@ -1577,7 +1646,7 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
  #undef STATE_TRACE
  
  #ifdef STATE_TRACE
-static char *statename[]={
+static const char *statename[]={
         "Unused","Established","Syn Sent","Syn Recv",
         "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
         "Close Wait","Last ACK","Listen","Closing"
@@ -1591,12 +1660,12 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
         switch (state) {
         case TCP_ESTABLISHED:
                 if (oldstate != TCP_ESTABLISHED)
-                       TCP_INC_STATS(TcpCurrEstab);
+                       TCP_INC_STATS(TCP_MIB_CURRESTAB);
                 break;
  
         case TCP_CLOSE:
                 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
-                       TCP_INC_STATS(TcpEstabResets);
+                       TCP_INC_STATS(TCP_MIB_ESTABRESETS);
  
                 sk->sk_prot->unhash(sk);
                 if (tcp_sk(sk)->bind_hash &&
@@ -1605,7 +1674,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
                 /* fall through */
         default:
                 if (oldstate==TCP_ESTABLISHED)
-                       TCP_DEC_STATS(TcpCurrEstab);
+                       TCP_DEC_STATS(TCP_MIB_CURRESTAB);
         }
  
         /* Change state AFTER socket is unhashed to avoid closed
@@ -1706,68 +1775,10 @@ static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
  }
  
-/* Determine a window scaling and initial window to offer.
- * Based on the assumption that the given amount of space
- * will be offered. Store the results in the tp structure.
- * NOTE: for smooth operation initial space offering should
- * be a multiple of mss if possible. We assume here that mss >= 1.
- * This MUST be enforced by all callers.
- */
-static inline void tcp_select_initial_window(int __space, __u32 mss,
-       __u32 *rcv_wnd,
-       __u32 *window_clamp,
-       int wscale_ok,
-       __u8 *rcv_wscale)
-{
-       unsigned int space = (__space < 0 ? 0 : __space);
-
-       /* If no clamp set the clamp to the max possible scaled window */
-       if (*window_clamp == 0)
-               (*window_clamp) = (65535 << 14);
-       space = min(*window_clamp, space);
-
-       /* Quantize space offering to a multiple of mss if possible. */
-       if (space > mss)
-               space = (space / mss) * mss;
-
-       /* NOTE: offering an initial window larger than 32767
-        * will break some buggy TCP stacks. We try to be nice.
-        * If we are not window scaling, then this truncates
-        * our initial window offering to 32k. There should also
-        * be a sysctl option to stop being nice.
-        */
-       (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
-       (*rcv_wscale) = 0;
-       if (wscale_ok) {
-               /* See RFC1323 for an explanation of the limit to 14 */
-               while (space > 65535 && (*rcv_wscale) < 14) {
-                       space >>= 1;
-                       (*rcv_wscale)++;
-               }
-               if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
-                   space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
-                       (*rcv_wscale)--;
-
-               *rcv_wscale = max((__u8)sysctl_tcp_default_win_scale,
-                                 *rcv_wscale);
-       }
-
-       /* Set initial window to value enough for senders,
-        * following RFC1414. Senders, not following this RFC,
-        * will be satisfied with 2.
-        */
-       if (mss > (1<<*rcv_wscale)) {
-               int init_cwnd = 4;
-               if (mss > 1460*3)
-                       init_cwnd = 2;
-               else if (mss > 1460)
-                       init_cwnd = 3;
-               if (*rcv_wnd > init_cwnd*mss)
-                       *rcv_wnd = init_cwnd*mss;
-       }
-       /* Set the clamp no higher than max representable value */
-       (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
-}
+/* Determine a window scaling and initial window to offer. */
+extern void tcp_select_initial_window(int __space, __u32 mss,
+                                     __u32 *rcv_wnd, __u32 *window_clamp,
+                                     int wscale_ok, __u8 *rcv_wscale);
  
  static inline int tcp_win_from_space(int space)
  {
@@ -1777,13 +1788,13 @@ static inline int tcp_win_from_space(int space)
  }
  
  /* Note: caller must be prepared to deal with negative returns */ 
-static inline int tcp_space(struct sock *sk)
+static inline int tcp_space(const struct sock *sk)
  {
         return tcp_win_from_space(sk->sk_rcvbuf -
                                   atomic_read(&sk->sk_rmem_alloc));
  } 
  
-static inline int tcp_full_space( struct sock *sk)
+static inline int tcp_full_space(const struct sock *sk)
  {
         return tcp_win_from_space(sk->sk_rcvbuf); 
  }
@@ -1987,96 +1998,7 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
         req->rmt_port = skb->h.th->source;
  }
  
-#define TCP_MEM_QUANTUM        ((int)PAGE_SIZE)
-
-static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
-{
-       tcp_sk(sk)->queue_shrunk = 1;
-       sk->sk_wmem_queued -= skb->truesize;
-       sk->sk_forward_alloc += skb->truesize;
-       __kfree_skb(skb);
-}
-
-extern void __tcp_mem_reclaim(struct sock *sk);
-extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
-
-static inline void tcp_mem_reclaim(struct sock *sk)
-{
-       if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM)
-               __tcp_mem_reclaim(sk);
-}
-
-static inline void tcp_enter_memory_pressure(void)
-{
-       if (!tcp_memory_pressure) {
-               NET_INC_STATS(TCPMemoryPressures);
-               tcp_memory_pressure = 1;
-       }
-}
-
-static inline void tcp_moderate_sndbuf(struct sock *sk)
-{
-       if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
-               sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued / 2);
-               sk->sk_sndbuf = max(sk->sk_sndbuf, SOCK_MIN_SNDBUF);
-       }
-}
-
-static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
-{
-       struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
-
-       if (skb) {
-               skb->truesize += mem;
-               if (sk->sk_forward_alloc >= (int)skb->truesize ||
-                   tcp_mem_schedule(sk, skb->truesize, 0)) {
-                       skb_reserve(skb, MAX_TCP_HEADER);
-                       return skb;
-               }
-               __kfree_skb(skb);
-       } else {
-               tcp_enter_memory_pressure();
-               tcp_moderate_sndbuf(sk);
-       }
-       return NULL;
-}
-
-static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
-{
-       return tcp_alloc_pskb(sk, size, 0, gfp);
-}
-
-static inline struct page * tcp_alloc_page(struct sock *sk)
-{
-       if (sk->sk_forward_alloc >= (int)PAGE_SIZE ||
-           tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
-               struct page *page = alloc_pages(sk->sk_allocation, 0);
-               if (page)
-                       return page;
-       }
-       tcp_enter_memory_pressure();
-       tcp_moderate_sndbuf(sk);
-       return NULL;
-}
-
-static inline void tcp_writequeue_purge(struct sock *sk)
-{
-       struct sk_buff *skb;
-
-       while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
-               tcp_free_skb(sk, skb);
-       tcp_mem_reclaim(sk);
-}
-
-extern void tcp_rfree(struct sk_buff *skb);
-
-static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
-{
-       skb->sk = sk;
-       skb->destructor = tcp_rfree;
-       atomic_add(skb->truesize, &sk->sk_rmem_alloc);
-       sk->sk_forward_alloc -= skb->truesize;
-}
+extern void tcp_enter_memory_pressure(void);
  
  extern void tcp_listen_wlock(void);
  
@@ -2099,17 +2021,17 @@ static inline void tcp_listen_unlock(void)
                 wake_up(&tcp_lhash_wait);
  }
  
-static inline int keepalive_intvl_when(struct tcp_opt *tp)
+static inline int keepalive_intvl_when(const struct tcp_opt *tp)
  {
         return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
  }
  
-static inline int keepalive_time_when(struct tcp_opt *tp)
+static inline int keepalive_time_when(const struct tcp_opt *tp)
  {
         return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
  }
  
-static inline int tcp_fin_time(struct tcp_opt *tp)
+static inline int tcp_fin_time(const struct tcp_opt *tp)
  {
         int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
  
@@ -2119,7 +2041,7 @@ static inline int tcp_fin_time(struct tcp_opt *tp)
         return fin_timeout;
  }
  
-static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
+static inline int tcp_paws_check(const struct tcp_opt *tp, int rst)
  {
         if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
                 return 0;
@@ -2162,18 +2084,18 @@ static inline int tcp_use_frto(const struct sock *sk)
          * unsent new data, and the advertised window should allow
          * sending it.
          */
-       return (sysctl_tcp_frto && tp->send_head &&
-               !after(TCP_SKB_CB(tp->send_head)->end_seq,
+       return (sysctl_tcp_frto && sk->sk_send_head &&
+               !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
                        tp->snd_una + tp->snd_wnd));
  }
  
  static inline void tcp_mib_init(void)
  {
         /* See RFC 2012 */
-       TCP_ADD_STATS_USER(TcpRtoAlgorithm, 1);
-       TCP_ADD_STATS_USER(TcpRtoMin, TCP_RTO_MIN*1000/HZ);
-       TCP_ADD_STATS_USER(TcpRtoMax, TCP_RTO_MAX*1000/HZ);
-       TCP_ADD_STATS_USER(TcpMaxConn, -1);
+       TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1);
+       TCP_ADD_STATS_USER(TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
+       TCP_ADD_STATS_USER(TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
+       TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
  }
  
  /* /proc */
@@ -2210,7 +2132,7 @@ extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
  
  static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq)
  {
-        if (sysctl_tcp_westwood)
+        if (tcp_is_westwood(tp))
                  tp->westwood.rtt = rtt_seq;
  }
  
@@ -2219,33 +2141,33 @@ void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *);
  
  static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb)
  {
-        if (sysctl_tcp_westwood)
+        if (tcp_is_westwood(tcp_sk(sk)))
                  __tcp_westwood_fast_bw(sk, skb);
  }
  
  static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
  {
-        if (sysctl_tcp_westwood)
+        if (tcp_is_westwood(tcp_sk(sk)))
                  __tcp_westwood_slow_bw(sk, skb);
  }
  
  static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
  {
          return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
-                  (__u32) (tp->mss_cache),
+                  (__u32) (tp->mss_cache_std),
                    2U);
  }
  
  static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
  {
-       return sysctl_tcp_westwood ? __tcp_westwood_bw_rttmin(tp) : 0;
+       return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0;
  }
  
  static inline int tcp_westwood_ssthresh(struct tcp_opt *tp)
  {
         __u32 ssthresh = 0;
  
-       if (sysctl_tcp_westwood) {
+       if (tcp_is_westwood(tp)) {
                 ssthresh = __tcp_westwood_bw_rttmin(tp);
                 if (ssthresh)
                         tp->snd_ssthresh = ssthresh;  
@@ -2258,7 +2180,7 @@ static inline int tcp_westwood_cwnd(struct tcp_opt *tp)
  {
         __u32 cwnd = 0;
  
-       if (sysctl_tcp_westwood) {
+       if (tcp_is_westwood(tp)) {
                 cwnd = __tcp_westwood_bw_rttmin(tp);
                 if (cwnd)
                         tp->snd_cwnd = cwnd;