fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / tcp_input.c
index 39dca51..c26076f 100644 (file)
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
-
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
-
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
+
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
+
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly;
 
 #define FLAG_DATA              0x01 /* Incoming frame contained data.          */
 #define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window update.       */
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
        /* skb->len may jitter because of SACKs, even if peer
         * sends good full-sized frames.
         */
-       len = skb->len;
+       len = skb_shinfo(skb)->gso_size ?: skb->len;
        if (len >= icsk->icsk_ack.rcv_mss) {
                icsk->icsk_ack.rcv_mss = len;
        } else {
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
                                return;
                        }
                }
+               if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
+                       icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
                icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
        }
 }
@@ -933,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
-       struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+       struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
        int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
        int reord = tp->packets_out;
        int prior_fackets;
@@ -1009,10 +1011,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        for (j = 0; j < i; j++){
                                if (after(ntohl(sp[j].start_seq),
                                          ntohl(sp[j+1].start_seq))){
-                                       sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
-                                       sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
-                                       sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
-                                       sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+                                       struct tcp_sack_block_wire tmp;
+
+                                       tmp = sp[j];
+                                       sp[j] = sp[j+1];
+                                       sp[j+1] = tmp;
                                }
 
                        }
@@ -2257,7 +2260,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
        u32 pkts_acked = 0;
        void (*rtt_sample)(struct sock *sk, u32 usrtt)
                = icsk->icsk_ca_ops->rtt_sample;
-       struct timeval tv;
+       struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
        while ((skb = skb_peek(&sk->sk_write_queue)) &&
               skb != sk->sk_send_head) {
@@ -2627,7 +2630,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
                                switch(opcode) {
                                case TCPOPT_MSS:
                                        if(opsize==TCPOLEN_MSS && th->syn && !estab) {
-                                               u16 in_mss = ntohs(get_unaligned((__u16 *)ptr));
+                                               u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
                                                if (in_mss) {
                                                        if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
                                                                in_mss = opt_rx->user_mss;
@@ -2655,8 +2658,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
                                                if ((estab && opt_rx->tstamp_ok) ||
                                                    (!estab && sysctl_tcp_timestamps)) {
                                                        opt_rx->saw_tstamp = 1;
-                                                       opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr));
-                                                       opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4)));
+                                                       opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+                                                       opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
                                                }
                                        }
                                        break;
@@ -2675,6 +2678,14 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
                                           opt_rx->sack_ok) {
                                                TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
                                        }
+#ifdef CONFIG_TCP_MD5SIG
+                               case TCPOPT_MD5SIG:
+                                       /*
+                                        * The MD5 Hash has already been
+                                        * checked (see tcp_v{4,6}_do_rcv()).
+                                        */
+                                       break;
+#endif
                                };
                                ptr+=opsize-2;
                                length-=opsize;
@@ -2693,8 +2704,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
                return 0;
        } else if (tp->rx_opt.tstamp_ok &&
                   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
-               __u32 *ptr = (__u32 *)(th + 1);
-               if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+               __be32 *ptr = (__be32 *)(th + 1);
+               if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
                                  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
                        tp->rx_opt.saw_tstamp = 1;
                        ++ptr;
@@ -3534,7 +3545,6 @@ static int tcp_prune_queue(struct sock *sk)
        return -1;
 }
 
-EXPORT_SYMBOL_GPL(tcp_cwnd_application_limited);
 
 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
  * As additional protections, we do not touch cwnd in retransmission phases,
@@ -3781,9 +3791,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
        return err;
 }
 
-static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
-       int result;
+       __sum16 result;
 
        if (sock_owned_by_user(sk)) {
                local_bh_enable();
@@ -3910,10 +3920,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
                /* Check timestamp */
                if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-                       __u32 *ptr = (__u32 *)(th + 1);
+                       __be32 *ptr = (__be32 *)(th + 1);
 
                        /* No? Slow path! */
-                       if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+                       if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
                                          | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
                                goto slow_path;
 
@@ -4226,9 +4236,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 * Change state from SYN-SENT only after copied_seq
                 * is initialized. */
                tp->copied_seq = tp->rcv_nxt;
-               mb();
+               smp_mb();
                tcp_set_state(sk, TCP_ESTABLISHED);
 
+               security_inet_conn_established(sk, skb);
+
                /* Make sure socket is routed, for correct metrics.  */
                icsk->icsk_af_ops->rebuild_header(sk);
 
@@ -4409,9 +4421,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                         * But, this leaves one open to an easy denial of
                         * service attack, and SYN cookies can't defend
                         * against this problem. So, we drop the data
-                        * in the interest of security over speed.
+                        * in the interest of security over speed unless
+                        * it's still in use.
                         */
-                       goto discard;
+                       kfree_skb(skb);
+                       return 0;
                }
                goto discard;
 
@@ -4472,7 +4486,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                case TCP_SYN_RECV:
                        if (acceptable) {
                                tp->copied_seq = tp->rcv_nxt;
-                               mb();
+                               smp_mb();
                                tcp_set_state(sk, TCP_ESTABLISHED);
                                sk->sk_state_change(sk);