linux 2.6.16.38 w/ vs2.0.3-rc1

[linux-2.6.git] / net / ipv4 / netfilter / ip_conntrack_proto_tcp.c
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c

index 324ab5d..e0dc370 100644 (file)
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,11 +32,9 @@
  
  #include <net/tcp.h>
  
-#include <linux/netfilter.h>
  #include <linux/netfilter_ipv4.h>
  #include <linux/netfilter_ipv4/ip_conntrack.h>
  #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
  
  #if 0
  #define DEBUGP printk
@@ -46,7 +44,7 @@
  #endif
  
  /* Protects conntrack->proto.tcp */
-static DECLARE_RWLOCK(tcp_lock);
+static DEFINE_RWLOCK(tcp_lock);
  
  /* "Be conservative in what you do, 
      be liberal in what you accept from others." 
@@ -86,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
  #define HOURS * 60 MINS
  #define DAYS * 24 HOURS
  
-unsigned long ip_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned long ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned long ip_ct_tcp_timeout_established =   5 DAYS;
-unsigned long ip_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned long ip_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned long ip_ct_tcp_timeout_close =        10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent =      2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv =     60 SECS;
+unsigned int ip_ct_tcp_timeout_established =   5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait =      2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait =   60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack =     30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait =     2 MINS;
+unsigned int ip_ct_tcp_timeout_close =        10 SECS;
  
  /* RFC1122 says the R2 limit should be at least 100 seconds.
     Linux uses 15 packets as limit, which corresponds 
     to ~13-30min depending on RTO. */
-unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
   
-static unsigned long * tcp_timeouts[]
+static const unsigned int * tcp_timeouts[]
  = { NULL,                              /*      TCP_CONNTRACK_NONE */
      &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
      &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -171,7 +169,7 @@ enum tcp_bit_set {
   *     if they are invalid
   *     or we do not support the request (simultaneous open)
   */
-static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
         {
  /* ORIGINAL */
  /*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
@@ -254,7 +252,7 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
   *     sSS -> sSR      Standard open.
   *     sSR -> sSR      Retransmitted SYN/ACK.
   *     sES -> sIG      Late retransmitted SYN/ACK?
- *     sFW -> sIG
+ *     sFW -> sIG      Might be SYN/ACK answering ignored SYN
   *     sCW -> sIG
   *     sLA -> sIG
   *     sTW -> sIG
@@ -273,10 +271,10 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
   *     sCL -> sCL
   */
  /*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
-/*ack*/           { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/           { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
  /*
   *     sSS -> sIG      Might be a half-open connection.
- *     sSR -> sIV      Simultaneous open.
+ *     sSR -> sSR      Might answer late resent SYN.
   *     sES -> sES      :-)
   *     sFW -> sCW      Normal close request answered by ACK.
   *     sCW -> sCW
@@ -330,13 +328,66 @@ static int tcp_print_conntrack(struct seq_file *s,
  {
         enum tcp_conntrack state;
  
-       READ_LOCK(&tcp_lock);
+       read_lock_bh(&tcp_lock);
         state = conntrack->proto.tcp.state;
-       READ_UNLOCK(&tcp_lock);
+       read_unlock_bh(&tcp_lock);
  
         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
  }
  
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+                        const struct ip_conntrack *ct)
+{
+       struct nfattr *nest_parms;
+       
+       read_lock_bh(&tcp_lock);
+       nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+       NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+               &ct->proto.tcp.state);
+       read_unlock_bh(&tcp_lock);
+
+       NFA_NEST_END(skb, nest_parms);
+
+       return 0;
+
+nfattr_failure:
+       read_unlock_bh(&tcp_lock);
+       return -1;
+}
+
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+       [CTA_PROTOINFO_TCP_STATE-1]     = sizeof(u_int8_t),
+};
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
+{
+       struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+       struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+       /* updates could not contain anything about the private
+        * protocol info, in that case skip the parsing */
+       if (!attr)
+               return 0;
+
+        nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+
+       if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+               return -EINVAL;
+
+       if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+               return -EINVAL;
+
+       write_lock_bh(&tcp_lock);
+       ct->proto.tcp.state = 
+               *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+       write_unlock_bh(&tcp_lock);
+
+       return 0;
+}
+#endif
+
  static unsigned int get_conntrack_index(const struct tcphdr *tcph)
  {
         if (tcph->rst) return TCP_RST_SET;
@@ -352,14 +403,19 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
     http://www.nluug.nl/events/sane2000/papers.html
     http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
     
-   The boundaries and the conditions are slightly changed:
-   
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
         td_maxend = max(sack + max(win,1)) seen in reply packets
         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+       td_maxwin += seq + len - sender.td_maxend
+                       if seq + len > sender.td_maxend
         td_end    = max(seq + len) seen in sent packets
     
-   I.  Upper bound for valid data:     seq + len <= sender.td_maxend
-   II.         Lower bound for valid data:     seq >= sender.td_end - receiver.td_maxwin
+   I.   Upper bound for valid data:    seq <= sender.td_maxend
+   II.  Lower bound for valid data:    seq + len >= sender.td_end - receiver.td_maxwin
     III.        Upper bound for valid ack:      sack <= receiver.td_end
     IV. Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
         
@@ -373,7 +429,7 @@ static inline __u32 segment_seq_plus_len(__u32 seq,
                                          size_t len,
                                          struct iphdr *iph,
                                          struct tcphdr *tcph)
-  {
+{
         return (seq + len - (iph->ihl + tcph->doff)*4
                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
  }
@@ -444,22 +500,33 @@ static void tcp_options(const struct sk_buff *skb,
         }
  }
  
-static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
+static void tcp_sack(const struct sk_buff *skb,
+                    struct iphdr *iph,
+                    struct tcphdr *tcph,
+                    __u32 *sack)
  {
-       __u32 tmp;
+       unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
         unsigned char *ptr;
         int length = (tcph->doff*4) - sizeof(struct tcphdr);
-       
+       __u32 tmp;
+
+       if (!length)
+               return;
+
+       ptr = skb_header_pointer(skb,
+                                (iph->ihl * 4) + sizeof(struct tcphdr),
+                                length, buff);
+       BUG_ON(ptr == NULL);
+
         /* Fast path for timestamp-only option */
         if (length == TCPOLEN_TSTAMP_ALIGNED*4
-           && *(__u32 *)(tcph + 1) ==
+           && *(__u32 *)ptr ==
                 __constant_ntohl((TCPOPT_NOP << 24) 
                                  | (TCPOPT_NOP << 16)
                                  | (TCPOPT_TIMESTAMP << 8)
                                  | TCPOLEN_TIMESTAMP))
                 return;
                 
-       ptr = (unsigned char *)(tcph + 1);
         while (length > 0) {
                 int opcode=*ptr++;
                 int opsize, i;
@@ -500,7 +567,7 @@ static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
  
  static int tcp_in_window(struct ip_ct_tcp *state, 
                           enum ip_conntrack_dir dir,
-                         unsigned int *index,
+                         unsigned int index,
                           const struct sk_buff *skb,
                           struct iphdr *iph,
                           struct tcphdr *tcph)
@@ -519,7 +586,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
         end = segment_seq_plus_len(seq, skb->len, iph, tcph);
         
         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
-               tcp_sack(tcph, &sack);
+               tcp_sack(skb, iph, tcph, &sack);
                 
         DEBUGP("tcp_in_window: START\n");
         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
@@ -598,20 +665,23 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 ack = sack = receiver->td_end;
         }
  
-       if (seq == end)
+       if (seq == end
+           && (!tcph->rst 
+               || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
                 /*
                  * Packets contains no data: we assume it is valid
                  * and check the ack value only.
+                * However RST segments are always validated by their
+                * SEQ number, except when seq == 0 (reset sent answering
+                * SYN.
                  */
                 seq = end = sender->td_end;
                 
         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-              "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
+              "seq=%u ack=%u sack =%u win=%u end=%u\n",
                 NIPQUAD(iph->saddr), ntohs(tcph->source),
                 NIPQUAD(iph->daddr), ntohs(tcph->dest),
-               seq, ack, sack, win, end, 
-               after(end, sender->td_maxend) && before(seq, sender->td_maxend)
-               ? sender->td_maxend : end);
+               seq, ack, sack, win, end);
         DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
                 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -619,24 +689,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
                 receiver->td_scale);
         
-       /* Ignore data over the right edge of the receiver's window. */
-       if (after(end, sender->td_maxend) &&
-           before(seq, sender->td_maxend)) {
-               end = sender->td_maxend;
-               if (*index == TCP_FIN_SET)
-                       *index = TCP_ACK_SET;
-       }
         DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-               before(end, sender->td_maxend + 1) 
-                   || before(seq, sender->td_maxend + 1),
-               after(seq, sender->td_end - receiver->td_maxwin - 1) 
-                   || after(end, sender->td_end - receiver->td_maxwin - 1),
+               before(seq, sender->td_maxend + 1),
+               after(end, sender->td_end - receiver->td_maxwin - 1),
                 before(sack, receiver->td_end + 1),
                 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
         
         if (sender->loose || receiver->loose ||
-           (before(end, sender->td_maxend + 1) &&
-            after(seq, sender->td_end - receiver->td_maxwin - 1) &&
+           (before(seq, sender->td_maxend + 1) &&
+            after(end, sender->td_end - receiver->td_maxwin - 1) &&
              before(sack, receiver->td_end + 1) &&
              after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
                 /*
@@ -653,6 +714,11 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                         sender->td_maxwin = swin;
                 if (after(end, sender->td_end))
                         sender->td_end = end;
+               /*
+                * Update receiver data.
+                */
+               if (after(end, sender->td_maxend))
+                       receiver->td_maxwin += end - sender->td_maxend;
                 if (after(sack + win, receiver->td_maxend - 1)) {
                         receiver->td_maxend = sack + win;
                         if (win == 0)
@@ -662,14 +728,16 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 /* 
                  * Check retransmissions.
                  */
-               if (*index == TCP_ACK_SET) {
+               if (index == TCP_ACK_SET) {
                         if (state->last_dir == dir
                             && state->last_seq == seq
+                           && state->last_ack == ack
                             && state->last_end == end)
                                 state->retrans++;
                         else {
                                 state->last_dir = dir;
                                 state->last_seq = seq;
+                               state->last_ack = ack;
                                 state->last_end = end;
                                 state->retrans = 0;
                         }
@@ -683,18 +751,18 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                 res = 1;
         } else {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                         "ip_ct_tcp: %s ",
-                       before(end, sender->td_maxend + 1) ?
-                       after(seq, sender->td_end - receiver->td_maxwin - 1) ?
+                       before(seq, sender->td_maxend + 1) ?
+                       after(end, sender->td_end - receiver->td_maxwin - 1) ?
                         before(sack, receiver->td_end + 1) ?
                         after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
-                       : "ACK is under the lower bound (possibly overly delayed ACK)"
-                       : "ACK is over the upper bound (ACKed data has never seen yet)"
-                       : "SEQ is under the lower bound (retransmitted already ACKed data)"
+                       : "ACK is under the lower bound (possible overly delayed ACK)"
+                       : "ACK is over the upper bound (ACKed data not seen yet)"
+                       : "SEQ is under the lower bound (already ACKed data retransmitted)"
                         : "SEQ is over the upper bound (over the window of the receiver)");
  
-               res = ip_ct_tcp_be_liberal && !tcph->rst;
+               res = ip_ct_tcp_be_liberal;
         }
    
         DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -707,9 +775,9 @@ static int tcp_in_window(struct ip_ct_tcp *state,
  
  #ifdef CONFIG_IP_NF_NAT_NEEDED
  /* Update sender->td_end after NAT successfully mangled the packet */
-int ip_conntrack_tcp_update(struct sk_buff *skb,
-                           struct ip_conntrack *conntrack, 
-                           int dir)
+void ip_conntrack_tcp_update(struct sk_buff *skb,
+                            struct ip_conntrack *conntrack, 
+                            enum ip_conntrack_dir dir)
  {
         struct iphdr *iph = skb->nh.iph;
         struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
@@ -721,22 +789,20 @@ int ip_conntrack_tcp_update(struct sk_buff *skb,
  
         end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
         
-       WRITE_LOCK(&tcp_lock);
+       write_lock_bh(&tcp_lock);
         /*
          * We have to worry for the ack in the reply packet only...
          */
         if (after(end, conntrack->proto.tcp.seen[dir].td_end))
                 conntrack->proto.tcp.seen[dir].td_end = end;
         conntrack->proto.tcp.last_end = end;
-       WRITE_UNLOCK(&tcp_lock);
+       write_unlock_bh(&tcp_lock);
         DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
                 sender->td_end, sender->td_maxend, sender->td_maxwin,
                 sender->td_scale, 
                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
                 receiver->td_scale);
-               
-       return 1;
  }
   
  #endif
@@ -751,10 +817,12 @@ int ip_conntrack_tcp_update(struct sk_buff *skb,
  #define        TH_CWR  0x80
  
  /* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
  {
         [TH_SYN]                        = 1,
         [TH_SYN|TH_ACK]                 = 1,
+       [TH_SYN|TH_PUSH]                = 1,
+       [TH_SYN|TH_ACK|TH_PUSH]         = 1,
         [TH_RST]                        = 1,
         [TH_RST|TH_ACK]                 = 1,
         [TH_RST|TH_ACK|TH_PUSH]         = 1,
@@ -783,7 +851,7 @@ static int tcp_error(struct sk_buff *skb,
                                 sizeof(_tcph), &_tcph);
         if (th == NULL) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                 "ip_ct_tcp: short packet ");
                 return -NF_ACCEPT;
         }
@@ -791,7 +859,7 @@ static int tcp_error(struct sk_buff *skb,
         /* Not whole TCP header or malformed packet */
         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                 "ip_ct_tcp: truncated/malformed packet ");
                 return -NF_ACCEPT;
         }
@@ -803,11 +871,12 @@ static int tcp_error(struct sk_buff *skb,
          */
         /* FIXME: Source route IP option packets --RR */
         if (hooknum == NF_IP_PRE_ROUTING
+           && skb->ip_summed != CHECKSUM_UNNECESSARY
             && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
                                  skb->ip_summed == CHECKSUM_HW ? skb->csum
                                  : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: bad TCP checksum ");
                 return -NF_ACCEPT;
         }
@@ -816,7 +885,7 @@ static int tcp_error(struct sk_buff *skb,
         tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
         if (!tcp_valid_flags[tcpflags]) {
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid TCP flag combination ");
                 return -NF_ACCEPT;
         }
@@ -840,7 +909,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                                 sizeof(_tcph), &_tcph);
         BUG_ON(th == NULL);
         
-       WRITE_LOCK(&tcp_lock);
+       write_lock_bh(&tcp_lock);
         old_state = conntrack->proto.tcp.state;
         dir = CTINFO2DIR(ctinfo);
         index = get_conntrack_index(th);
@@ -848,14 +917,17 @@ static int tcp_packet(struct ip_conntrack *conntrack,
  
         switch (new_state) {
         case TCP_CONNTRACK_IGNORE:
-               /* Either SYN in ORIGINAL
-                * or SYN/ACK in REPLY
-                * or ACK in REPLY direction (half-open connection). */
+               /* Ignored packets:
+                * 
+                * a) SYN in ORIGINAL
+                * b) SYN/ACK in REPLY
+                * c) ACK in reply direction after initial SYN in original.
+                */
                 if (index == TCP_SYNACK_SET
                     && conntrack->proto.tcp.last_index == TCP_SYN_SET
                     && conntrack->proto.tcp.last_dir != dir
-                   && after(ntohl(th->ack_seq),
-                            conntrack->proto.tcp.last_seq)) {
+                   && ntohl(th->ack_seq) ==
+                            conntrack->proto.tcp.last_end) {
                         /* This SYN/ACK acknowledges a SYN that we earlier 
                          * ignored as invalid. This means that the client and
                          * the server are both in sync, while the firewall is
@@ -863,10 +935,11 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                          * that the client cannot but retransmit its SYN and 
                          * thus initiate a clean new session.
                          */
-                       WRITE_UNLOCK(&tcp_lock);
+                       write_unlock_bh(&tcp_lock);
                         if (LOG_INVALID(IPPROTO_TCP))
-                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
-                                         "ip_ct_tcp: killing out of sync session ");
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                                             NULL, "ip_ct_tcp: "
+                                             "killing out of sync session ");
                         if (del_timer(&conntrack->timeout))
                                 conntrack->timeout.function((unsigned long)
                                                             conntrack);
@@ -875,10 +948,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                 conntrack->proto.tcp.last_index = index;
                 conntrack->proto.tcp.last_dir = dir;
                 conntrack->proto.tcp.last_seq = ntohl(th->seq);
+               conntrack->proto.tcp.last_end = 
+                   segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
                 
-               WRITE_UNLOCK(&tcp_lock);
+               write_unlock_bh(&tcp_lock);
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid packet ignored ");
                 return NF_ACCEPT;
         case TCP_CONNTRACK_MAX:
@@ -886,53 +961,64 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
                        dir, get_conntrack_index(th),
                        old_state);
-               WRITE_UNLOCK(&tcp_lock);
+               write_unlock_bh(&tcp_lock);
                 if (LOG_INVALID(IPPROTO_TCP))
-                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                   "ip_ct_tcp: invalid state ");
                 return -NF_ACCEPT;
         case TCP_CONNTRACK_SYN_SENT:
-               if (old_state >= TCP_CONNTRACK_TIME_WAIT) {     
+               if (old_state < TCP_CONNTRACK_TIME_WAIT)
+                       break;
+               if ((conntrack->proto.tcp.seen[dir].flags &
+                        IP_CT_TCP_FLAG_CLOSE_INIT)
+                   || after(ntohl(th->seq),
+                            conntrack->proto.tcp.seen[dir].td_end)) {  
                         /* Attempt to reopen a closed connection.
                         * Delete this connection and look up again. */
-                       WRITE_UNLOCK(&tcp_lock);
+                       write_unlock_bh(&tcp_lock);
                         if (del_timer(&conntrack->timeout))
                                 conntrack->timeout.function((unsigned long)
                                                             conntrack);
                         return -NF_REPEAT;
+               } else {
+                       write_unlock_bh(&tcp_lock);
+                       if (LOG_INVALID(IPPROTO_TCP))
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                                             NULL, "ip_ct_tcp: invalid SYN");
+                       return -NF_ACCEPT;
                 }
-               break;
         case TCP_CONNTRACK_CLOSE:
                 if (index == TCP_RST_SET
                     && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-                        && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET)
-                        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-                        && conntrack->proto.tcp.last_index == TCP_ACK_SET))                && after(ntohl(th->ack_seq),
-                            conntrack->proto.tcp.last_seq)) {
-                       /* Ignore RST closing down invalid SYN or ACK
-                          we had let trough. */ 
-                       WRITE_UNLOCK(&tcp_lock);
-                       if (LOG_INVALID(IPPROTO_TCP))
-                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
-                                         "ip_ct_tcp: invalid RST (ignored) ");
-                       return NF_ACCEPT;
+                        && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+                       || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+                           && conntrack->proto.tcp.last_index == TCP_ACK_SET))
+                   && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+                       /* RST sent to invalid SYN or ACK we had let through
+                        * at a) and c) above:
+                        *
+                        * a) SYN was in window then
+                        * c) we hold a half-open connection.
+                        *
+                        * Delete our connection entry.
+                        * We skip window checking, because packet might ACK
+                        * segments we ignored. */
+                       goto in_window;
                 }
-               /* Just fall trough */
+               /* Just fall through */
         default:
                 /* Keep compilers happy. */
                 break;
         }
  
-       if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, 
+       if (!tcp_in_window(&conntrack->proto.tcp, dir, index, 
                            skb, iph, th)) {
-               WRITE_UNLOCK(&tcp_lock);
+               write_unlock_bh(&tcp_lock);
                 return -NF_ACCEPT;
         }
-       /* From now on we have got in-window packets */
-       
-       /* If FIN was trimmed off, we don't change state. */
+    in_window:
+       /* From now on we have got in-window packets */ 
         conntrack->proto.tcp.last_index = index;
-       new_state = tcp_conntracks[dir][index][old_state];
  
         DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
                "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
@@ -943,10 +1029,18 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                 old_state, new_state);
  
         conntrack->proto.tcp.state = new_state;
+       if (old_state != new_state 
+           && (new_state == TCP_CONNTRACK_FIN_WAIT
+               || new_state == TCP_CONNTRACK_CLOSE))
+               conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
         timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
                   && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
                   ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
-       WRITE_UNLOCK(&tcp_lock);
+       write_unlock_bh(&tcp_lock);
+
+       ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+       if (new_state != old_state)
+               ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
  
         if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
                 /* If only reply is a RST, we can consider ourselves not to
@@ -966,14 +1060,15 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                 /* Set ASSURED if we see see valid ack in ESTABLISHED 
                    after SYN_RECV or a valid answer for a picked up 
                    connection. */
-                       set_bit(IPS_ASSURED_BIT, &conntrack->status);
+               set_bit(IPS_ASSURED_BIT, &conntrack->status);
+               ip_conntrack_event_cache(IPCT_STATUS, skb);
         }
         ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
  
         return NF_ACCEPT;
  }
   
-  /* Called when a new connection for this protocol found. */
+/* Called when a new connection for this protocol found. */
  static int tcp_new(struct ip_conntrack *conntrack,
                    const struct sk_buff *skb)
  {
@@ -1060,22 +1155,6 @@ static int tcp_new(struct ip_conntrack *conntrack,
         return 1;
  }
    
-static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
-                              const struct sk_buff *skb)
-{
-       const struct iphdr *iph = skb->nh.iph;
-       struct tcphdr *th, _tcph;
-       unsigned int datalen;
-
-       th = skb_header_pointer(skb, iph->ihl * 4,
-                               sizeof(_tcph), &_tcph);
-       if (th == NULL)
-               return 0;
-       datalen = skb->len - iph->ihl*4 - th->doff*4;
-
-       return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen);
-}
-
  struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
  {
         .proto                  = IPPROTO_TCP,
@@ -1086,6 +1165,12 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
         .print_conntrack        = tcp_print_conntrack,
         .packet                 = tcp_packet,
         .new                    = tcp_new,
-       .exp_matches_pkt        = tcp_exp_matches_pkt,
         .error                  = tcp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+       .to_nfattr              = tcp_to_nfattr,
+       .from_nfattr            = nfattr_to_tcp,
+       .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
+       .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
+#endif
  };