vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ip_conntrack_proto_tcp.c
index 463cafa..8fab05a 100644 (file)
@@ -4,8 +4,22 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
+ *     - Real stateful connection tracking
+ *     - Modified state transitions table
+ *     - Window scaling support added
+ *     - SACK support added
+ *
+ * Willy Tarreau:
+ *     - State table bugfixes
+ *     - More robust state changes
+ *     - Tuning timer parameters
+ *
+ * version 2.2
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
-#include <linux/string.h>
+#include <linux/spinlock.h>
 
 #include <net/tcp.h>
 
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/lockhelp.h>
 
 #if 0
 #define DEBUGP printk
+#define DEBUGP_VARS
 #else
 #define DEBUGP(format, args...)
 #endif
 /* Protects conntrack->proto.tcp */
 static DECLARE_RWLOCK(tcp_lock);
 
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
-   closely.  They're more complex. --RR */
+/* "Be conservative in what you do, 
+    be liberal in what you accept from others." 
+    If it's non-zero, we mark only out of window RST segments as INVALID. */
+int ip_ct_tcp_be_liberal = 0;
 
-/* Actually, I believe that neither ipmasq (where this code is stolen
-   from) nor ipfilter do it exactly right.  A new conntrack machine taking
-   into account packet loss (which creates uncertainty as to exactly
-   the conntrack of the connection) is required.  RSN.  --RR */
+/* When connection is picked up from the middle, how many packets are required
+   to pass in each direction when we assume we are in sync - if any side uses
+   window scaling, we lost the game. 
+   If it is set to zero, we disable picking up already established 
+   connections. */
+int ip_ct_tcp_loose = 3;
+
+/* Max number of the retransmitted packets without receiving an (acceptable) 
+   ACK from the destination. If this number is reached, a shorter timer 
+   will be started. */
+int ip_ct_tcp_max_retrans = 3;
+
+  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
+     closely.  They're more complex. --RR */
 
 static const char *tcp_conntrack_names[] = {
        "NONE",
-       "ESTABLISHED",
        "SYN_SENT",
        "SYN_RECV",
+       "ESTABLISHED",
        "FIN_WAIT",
-       "TIME_WAIT",
-       "CLOSE",
        "CLOSE_WAIT",
        "LAST_ACK",
+       "TIME_WAIT",
+       "CLOSE",
        "LISTEN"
 };
-
-#define SECS *HZ
+  
+#define SECS * HZ
 #define MINS * 60 SECS
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
@@ -66,64 +95,214 @@ unsigned long ip_ct_tcp_timeout_last_ack =     30 SECS;
 unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
 unsigned long ip_ct_tcp_timeout_close =        10 SECS;
 
+/* RFC1122 says the R2 limit should be at least 100 seconds.
+   Linux uses 15 packets as limit, which corresponds 
+   to ~13-30min depending on RTO. */
+unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
 static unsigned long * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
-    &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
+    &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
     &ip_ct_tcp_timeout_fin_wait,       /*      TCP_CONNTRACK_FIN_WAIT, */
-    &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
-    &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
     &ip_ct_tcp_timeout_close_wait,     /*      TCP_CONNTRACK_CLOSE_WAIT,       */
     &ip_ct_tcp_timeout_last_ack,       /*      TCP_CONNTRACK_LAST_ACK, */
+    &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
+    &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
     NULL,                              /*      TCP_CONNTRACK_LISTEN */
  };
  
 #define sNO TCP_CONNTRACK_NONE
-#define sES TCP_CONNTRACK_ESTABLISHED
 #define sSS TCP_CONNTRACK_SYN_SENT
 #define sSR TCP_CONNTRACK_SYN_RECV
+#define sES TCP_CONNTRACK_ESTABLISHED
 #define sFW TCP_CONNTRACK_FIN_WAIT
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
 #define sCW TCP_CONNTRACK_CLOSE_WAIT
 #define sLA TCP_CONNTRACK_LAST_ACK
+#define sTW TCP_CONNTRACK_TIME_WAIT
+#define sCL TCP_CONNTRACK_CLOSE
 #define sLI TCP_CONNTRACK_LISTEN
 #define sIV TCP_CONNTRACK_MAX
+#define sIG TCP_CONNTRACK_IGNORE
 
-static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
+/* What TCP flags are set from RST/SYN/FIN/ACK. */
+enum tcp_bit_set {
+       TCP_SYN_SET,
+       TCP_SYNACK_SET,
+       TCP_FIN_SET,
+       TCP_ACK_SET,
+       TCP_RST_SET,
+       TCP_NONE_SET,
+};
+  
+/*
+ * The TCP state transition table needs a few words...
+ *
+ * We are the man in the middle. All the packets go through us
+ * but might get lost in transit to the destination.
+ * It is assumed that the destinations can't receive segments 
+ * we haven't seen.
+ *
+ * The checked segment is in window, but our windows are *not*
+ * equivalent with the ones of the sender/receiver. We always
+ * try to guess the state of the current sender.
+ *
+ * The meaning of the states are:
+ *
+ * NONE:       initial state
+ * SYN_SENT:   SYN-only packet seen 
+ * SYN_RECV:   SYN-ACK packet seen
+ * ESTABLISHED:        ACK packet seen
+ * FIN_WAIT:   FIN packet seen
+ * CLOSE_WAIT: ACK seen (after FIN) 
+ * LAST_ACK:   FIN seen (after FIN)
+ * TIME_WAIT:  last ACK seen
+ * CLOSE:      closed connection
+ *
+ * LISTEN state is not used.
+ *
+ * Packets marked as IGNORED (sIG):
+ *     if they may be either invalid or valid 
+ *     and the receiver may send back a connection 
+ *     closing RST or a SYN/ACK.
+ *
+ * Packets marked as INVALID (sIV):
+ *     if they are invalid
+ *     or we do not support the request (simultaneous open)
+ */
+static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
        {
-/*     ORIGINAL */
-/*       sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI      */
-/*syn*/        {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
-/*fin*/        {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
-/*ack*/        {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
-/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
-/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+/* ORIGINAL */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*syn*/           { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/*
+ *     sNO -> sSS      Initialize a new connection
+ *     sSS -> sSS      Retransmitted SYN
+ *     sSR -> sIG      Late retransmitted SYN?
+ *     sES -> sIG      Error: SYNs in window outside the SYN_SENT state
+ *                     are errors. Receiver will reply with RST 
+ *                     and close the connection.
+ *                     Or we are not in sync and hold a dead connection.
+ *     sFW -> sIG
+ *     sCW -> sIG
+ *     sLA -> sIG
+ *     sTW -> sSS      Reopened connection (RFC 1122).
+ *     sCL -> sSS
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ * A SYN/ACK from the client is always invalid:
+ *     - either it tries to set up a simultaneous open, which is 
+ *       not supported;
+ *     - or the firewall has just been inserted between the two hosts
+ *       during the session set-up. The SYN will be retransmitted 
+ *       by the true client (or it'll time out).
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *     sNO -> sIV      Too late and no reason to do anything...
+ *     sSS -> sIV      Client migth not send FIN in this state:
+ *                     we enforce waiting for a SYN/ACK reply first.
+ *     sSR -> sFW      Close started.
+ *     sES -> sFW      
+ *     sFW -> sLA      FIN seen in both directions, waiting for
+ *                     the last ACK. 
+ *                     Migth be a retransmitted FIN as well...
+ *     sCW -> sLA
+ *     sLA -> sLA      Retransmitted FIN. Remain in the same state.
+ *     sTW -> sTW
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*ack*/           { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *     sNO -> sES      Assumed.
+ *     sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
+ *     sSR -> sES      Established state is reached.
+ *     sES -> sES      :-)
+ *     sFW -> sCW      Normal close request answered by ACK.
+ *     sCW -> sCW
+ *     sLA -> sTW      Last ACK detected.
+ *     sTW -> sTW      Retransmitted last ACK. Remain in the same state.
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
        },
        {
-/*     REPLY */
-/*       sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI      */
-/*syn*/        {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
-/*fin*/        {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
-/*ack*/        {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
-/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
-/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
-       }
+/* REPLY */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*syn*/           { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/*
+ *     sNO -> sIV      Never reached.
+ *     sSS -> sIV      Simultaneous open, not supported
+ *     sSR -> sIV      Simultaneous open, not supported.
+ *     sES -> sIV      Server may not initiate a connection.
+ *     sFW -> sIV
+ *     sCW -> sIV
+ *     sLA -> sIV
+ *     sTW -> sIV      Reopened connection, but server may not do it.
+ *     sCL -> sIV
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/*
+ *     sSS -> sSR      Standard open.
+ *     sSR -> sSR      Retransmitted SYN/ACK.
+ *     sES -> sIG      Late retransmitted SYN/ACK?
+ *     sFW -> sIG
+ *     sCW -> sIG
+ *     sLA -> sIG
+ *     sTW -> sIG
+ *     sCL -> sIG
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
+/*
+ *     sSS -> sIV      Server might not send FIN in this state.
+ *     sSR -> sFW      Close started.
+ *     sES -> sFW
+ *     sFW -> sLA      FIN seen in both directions.
+ *     sCW -> sLA
+ *     sLA -> sLA      Retransmitted FIN.
+ *     sTW -> sTW
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*ack*/           { sIV, sIV, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*
+ *     sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
+ *     sSR -> sIV      Simultaneous open.
+ *     sES -> sES      :-)
+ *     sFW -> sCW      Normal close request answered by ACK.
+ *     sCW -> sCW
+ *     sLA -> sTW      Last ACK detected.
+ *     sTW -> sTW      Retransmitted last ACK.
+ *     sCL -> sCL
+ */
+/*          sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
+       }
 };
 
 static int tcp_pkt_to_tuple(const struct sk_buff *skb,
-                            unsigned int dataoff,
-                            struct ip_conntrack_tuple *tuple)
+                           unsigned int dataoff,
+                           struct ip_conntrack_tuple *tuple)
 {
-       struct tcphdr hdr;
+       struct tcphdr _hdr, *hp;
 
        /* Actually only need first 8 bytes. */
-       if (skb_copy_bits(skb, dataoff, &hdr, 8) != 0)
+       hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+       if (hp == NULL)
                return 0;
 
-       tuple->src.u.tcp.port = hdr.source;
-       tuple->dst.u.tcp.port = hdr.dest;
+       tuple->src.u.tcp.port = hp->source;
+       tuple->dst.u.tcp.port = hp->dest;
 
        return 1;
 }
@@ -137,17 +316,17 @@ static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
 }
 
 /* Print out the per-protocol part of the tuple. */
-static unsigned int tcp_print_tuple(char *buffer,
-                                   const struct ip_conntrack_tuple *tuple)
+static int tcp_print_tuple(struct seq_file *s,
+                          const struct ip_conntrack_tuple *tuple)
 {
-       return sprintf(buffer, "sport=%hu dport=%hu ",
-                      ntohs(tuple->src.u.tcp.port),
-                      ntohs(tuple->dst.u.tcp.port));
+       return seq_printf(s, "sport=%hu dport=%hu ",
+                         ntohs(tuple->src.u.tcp.port),
+                         ntohs(tuple->dst.u.tcp.port));
 }
 
 /* Print out the private part of the conntrack. */
-static unsigned int tcp_print_conntrack(char *buffer,
-                                       const struct ip_conntrack *conntrack)
+static int tcp_print_conntrack(struct seq_file *s,
+                              const struct ip_conntrack *conntrack)
 {
        enum tcp_conntrack state;
 
@@ -155,16 +334,493 @@ static unsigned int tcp_print_conntrack(char *buffer,
        state = conntrack->proto.tcp.state;
        READ_UNLOCK(&tcp_lock);
 
-       return sprintf(buffer, "%s ", tcp_conntrack_names[state]);
+       return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 }
 
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 {
-       if (tcph->rst) return 3;
-       else if (tcph->syn) return 0;
-       else if (tcph->fin) return 1;
-       else if (tcph->ack) return 2;
-       else return 4;
+       if (tcph->rst) return TCP_RST_SET;
+       else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
+       else if (tcph->fin) return TCP_FIN_SET;
+       else if (tcph->ack) return TCP_ACK_SET;
+       else return TCP_NONE_SET;
+}
+
+/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
+   in IP Filter' by Guido van Rooij.
+   
+   http://www.nluug.nl/events/sane2000/papers.html
+   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
+   
+   The boundaries and the conditions are slightly changed:
+   
+       td_maxend = max(sack + max(win,1)) seen in reply packets
+       td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+       td_end    = max(seq + len) seen in sent packets
+   
+   I.  Upper bound for valid data:     seq + len <= sender.td_maxend
+   II.         Lower bound for valid data:     seq >= sender.td_end - receiver.td_maxwin
+   III.        Upper bound for valid ack:      sack <= receiver.td_end
+   IV. Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
+       
+   where sack is the highest right edge of sack block found in the packet.
+       
+   The upper bound limit for a valid ack is not ignored - 
+   we doesn't have to deal with fragments. 
+*/
+
+static inline __u32 segment_seq_plus_len(__u32 seq,
+                                        size_t len,
+                                        struct iphdr *iph,
+                                        struct tcphdr *tcph)
+  {
+       return (seq + len - (iph->ihl + tcph->doff)*4
+               + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
+}
+  
+/* Fixme: what about big packets? */
+#define MAXACKWINCONST                 66000
+#define MAXACKWINDOW(sender)                                           \
+       ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
+                                             : MAXACKWINCONST)
+  
+/*
+ * Simplified tcp_parse_options routine from tcp_input.c
+ */
+static void tcp_options(const struct sk_buff *skb,
+                       struct iphdr *iph,
+                       struct tcphdr *tcph, 
+                       struct ip_ct_tcp_state *state)
+{
+       unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+       unsigned char *ptr;
+       int length = (tcph->doff*4) - sizeof(struct tcphdr);
+       
+       if (!length)
+               return;
+
+       ptr = skb_header_pointer(skb,
+                                (iph->ihl * 4) + sizeof(struct tcphdr),
+                                length, buff);
+       BUG_ON(ptr == NULL);
+
+       state->td_scale = 
+       state->flags = 0;
+       
+       while (length > 0) {
+               int opcode=*ptr++;
+               int opsize;
+               
+               switch (opcode) {
+               case TCPOPT_EOL:
+                       return;
+               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                       length--;
+                       continue;
+               default:
+                       opsize=*ptr++;
+                       if (opsize < 2) /* "silly options" */
+                               return;
+                       if (opsize > length)
+                               break;  /* don't parse partial options */
+
+                       if (opcode == TCPOPT_SACK_PERM 
+                           && opsize == TCPOLEN_SACK_PERM)
+                               state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
+                       else if (opcode == TCPOPT_WINDOW
+                                && opsize == TCPOLEN_WINDOW) {
+                               state->td_scale = *(u_int8_t *)ptr;
+                               
+                               if (state->td_scale > 14) {
+                                       /* See RFC1323 */
+                                       state->td_scale = 14;
+                               }
+                               state->flags |=
+                                       IP_CT_TCP_STATE_FLAG_WINDOW_SCALE;
+                       }
+                       ptr += opsize - 2;
+                       length -= opsize;
+               }
+       }
+}
+
+static void tcp_sack(struct tcphdr *tcph, __u32 *sack)
+{
+       __u32 tmp;
+       unsigned char *ptr;
+       int length = (tcph->doff*4) - sizeof(struct tcphdr);
+       
+       /* Fast path for timestamp-only option */
+       if (length == TCPOLEN_TSTAMP_ALIGNED*4
+           && *(__u32 *)(tcph + 1) ==
+               __constant_ntohl((TCPOPT_NOP << 24) 
+                                | (TCPOPT_NOP << 16)
+                                | (TCPOPT_TIMESTAMP << 8)
+                                | TCPOLEN_TIMESTAMP))
+               return;
+               
+       ptr = (unsigned char *)(tcph + 1);
+       while (length > 0) {
+               int opcode=*ptr++;
+               int opsize, i;
+               
+               switch (opcode) {
+               case TCPOPT_EOL:
+                       return;
+               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                       length--;
+                       continue;
+               default:
+                       opsize=*ptr++;
+                       if (opsize < 2) /* "silly options" */
+                               return;
+                       if (opsize > length)
+                               break;  /* don't parse partial options */
+
+                       if (opcode == TCPOPT_SACK 
+                           && opsize >= (TCPOLEN_SACK_BASE 
+                                         + TCPOLEN_SACK_PERBLOCK)
+                           && !((opsize - TCPOLEN_SACK_BASE) 
+                                % TCPOLEN_SACK_PERBLOCK)) {
+                               for (i = 0;
+                                    i < (opsize - TCPOLEN_SACK_BASE);
+                                    i += TCPOLEN_SACK_PERBLOCK) {
+                                       tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+                                       
+                                       if (after(tmp, *sack))
+                                               *sack = tmp;
+                               }
+                               return;
+                       }
+                       ptr += opsize - 2;
+                       length -= opsize;
+               }
+       }
+}
+
+static int tcp_in_window(struct ip_ct_tcp *state, 
+                         enum ip_conntrack_dir dir,
+                         unsigned int *index,
+                         const struct sk_buff *skb,
+                         struct iphdr *iph,
+                         struct tcphdr *tcph)
+{
+       struct ip_ct_tcp_state *sender = &state->seen[dir];
+       struct ip_ct_tcp_state *receiver = &state->seen[!dir];
+       __u32 seq, ack, sack, end, win, swin;
+       int res;
+       
+       /*
+        * Get the required data from the packet.
+        */
+       seq = ntohl(tcph->seq);
+       ack = sack = ntohl(tcph->ack_seq);
+       win = ntohs(tcph->window);
+       end = segment_seq_plus_len(seq, skb->len, iph, tcph);
+       
+       if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
+               tcp_sack(tcph, &sack);
+               
+       DEBUGP("tcp_in_window: START\n");
+       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "seq=%u ack=%u sack=%u win=%u end=%u\n",
+               NIPQUAD(iph->saddr), ntohs(tcph->source), 
+               NIPQUAD(iph->daddr), ntohs(tcph->dest),
+               seq, ack, sack, win, end);
+       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 
+               receiver->td_scale);
+               
+       if (sender->td_end == 0) {
+               /*
+                * Initialize sender data.
+                */
+               if (tcph->syn && tcph->ack) {
+                       /*
+                        * Outgoing SYN-ACK in reply to a SYN.
+                        */
+                       sender->td_end = 
+                       sender->td_maxend = end;
+                       sender->td_maxwin = (win == 0 ? 1 : win);
+
+                       tcp_options(skb, iph, tcph, sender);
+                       /* 
+                        * RFC 1323:
+                        * Both sides must send the Window Scale option
+                        * to enable window scaling in either direction.
+                        */
+                       if (!(sender->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE
+                             && receiver->flags & IP_CT_TCP_STATE_FLAG_WINDOW_SCALE))
+                               sender->td_scale = 
+                               receiver->td_scale = 0;
+               } else {
+                       /*
+                        * We are in the middle of a connection,
+                        * its history is lost for us.
+                        * Let's try to use the data from the packet.
+                        */
+                       sender->td_end = end;
+                       sender->td_maxwin = (win == 0 ? 1 : win);
+                       sender->td_maxend = end + sender->td_maxwin;
+               }
+       } else if (state->state == TCP_CONNTRACK_SYN_SENT
+                  && dir == IP_CT_DIR_ORIGINAL
+                  && after(end, sender->td_end)) {
+               /*
+                * RFC 793: "if a TCP is reinitialized ... then it need
+                * not wait at all; it must only be sure to use sequence 
+                * numbers larger than those recently used."
+                */
+               sender->td_end =
+               sender->td_maxend = end;
+               sender->td_maxwin = (win == 0 ? 1 : win);
+
+               tcp_options(skb, iph, tcph, sender);
+       }
+       
+       if (!(tcph->ack)) {
+               /*
+                * If there is no ACK, just pretend it was set and OK.
+                */
+               ack = sack = receiver->td_end;
+       } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 
+                   (TCP_FLAG_ACK|TCP_FLAG_RST)) 
+                  && (ack == 0)) {
+               /*
+                * Broken TCP stacks, that set ACK in RST packets as well
+                * with zero ack value.
+                */
+               ack = sack = receiver->td_end;
+       }
+
+       if (seq == end)
+               /*
+                * Packets contains no data: we assume it is valid
+                * and check the ack value only.
+                */
+               seq = end = sender->td_end;
+               
+       DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
+               NIPQUAD(iph->saddr), ntohs(tcph->source),
+               NIPQUAD(iph->daddr), ntohs(tcph->dest),
+               seq, ack, sack, win, end, 
+               after(end, sender->td_maxend) && before(seq, sender->td_maxend)
+               ? sender->td_maxend : end);
+       DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
+       
+       /* Ignore data over the right edge of the receiver's window. */
+       if (after(end, sender->td_maxend) &&
+           before(seq, sender->td_maxend)) {
+               end = sender->td_maxend;
+               if (*index == TCP_FIN_SET)
+                       *index = TCP_ACK_SET;
+       }
+       DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
+               before(end, sender->td_maxend + 1) 
+                   || before(seq, sender->td_maxend + 1),
+               after(seq, sender->td_end - receiver->td_maxwin - 1) 
+                   || after(end, sender->td_end - receiver->td_maxwin - 1),
+               before(sack, receiver->td_end + 1),
+               after(ack, receiver->td_end - MAXACKWINDOW(sender)));
+       
+       if (sender->loose || receiver->loose ||
+           (before(end, sender->td_maxend + 1) &&
+            after(seq, sender->td_end - receiver->td_maxwin - 1) &&
+            before(sack, receiver->td_end + 1) &&
+            after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
+               /*
+                * Take into account window scaling (RFC 1323).
+                */
+               if (!tcph->syn)
+                       win <<= sender->td_scale;
+               
+               /*
+                * Update sender data.
+                */
+               swin = win + (sack - ack);
+               if (sender->td_maxwin < swin)
+                       sender->td_maxwin = swin;
+               if (after(end, sender->td_end))
+                       sender->td_end = end;
+               if (after(sack + win, receiver->td_maxend - 1)) {
+                       receiver->td_maxend = sack + win;
+                       if (win == 0)
+                               receiver->td_maxend++;
+               }
+
+               /* 
+                * Check retransmissions.
+                */
+               if (*index == TCP_ACK_SET) {
+                       if (state->last_dir == dir
+                           && state->last_seq == seq
+                           && state->last_end == end)
+                               state->retrans++;
+                       else {
+                               state->last_dir = dir;
+                               state->last_seq = seq;
+                               state->last_end = end;
+                               state->retrans = 0;
+                       }
+               }
+               /*
+                * Close the window of disabled window tracking :-)
+                */
+               if (sender->loose)
+                       sender->loose--;
+               
+               res = 1;
+       } else {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                       "ip_ct_tcp: %s ",
+                       before(end, sender->td_maxend + 1) ?
+                       after(seq, sender->td_end - receiver->td_maxwin - 1) ?
+                       before(ack, receiver->td_end + 1) ?
+                       after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+                       : "ACK is under the lower bound (possibly overly delayed ACK)"
+                       : "ACK is over the upper bound (ACKed data has never seen yet)"
+                       : "SEQ is under the lower bound (retransmitted already ACKed data)"
+                       : "SEQ is over the upper bound (over the window of the receiver)");
+
+               res = ip_ct_tcp_be_liberal && !tcph->rst;
+       }
+  
+       DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
+              "receiver end=%u maxend=%u maxwin=%u\n",
+               res, sender->td_end, sender->td_maxend, sender->td_maxwin, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+
+       return res;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+/* Update sender->td_end after NAT successfully mangled the packet */
+int ip_conntrack_tcp_update(struct sk_buff *skb,
+                           struct ip_conntrack *conntrack, 
+                           int dir)
+{
+       struct iphdr *iph = skb->nh.iph;
+       struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+       __u32 end;
+#ifdef DEBUGP_VARS
+       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
+       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
+#endif
+
+       end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
+       
+       WRITE_LOCK(&tcp_lock);
+       /*
+        * We have to worry for the ack in the reply packet only...
+        */
+       if (after(end, conntrack->proto.tcp.seen[dir].td_end))
+               conntrack->proto.tcp.seen[dir].td_end = end;
+       conntrack->proto.tcp.last_end = end;
+       WRITE_UNLOCK(&tcp_lock);
+       DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
+               
+       return 1;
+}
+EXPORT_SYMBOL(ip_conntrack_tcp_update);
+#endif
+
+#define        TH_FIN  0x01
+#define        TH_SYN  0x02
+#define        TH_RST  0x04
+#define        TH_PUSH 0x08
+#define        TH_ACK  0x10
+#define        TH_URG  0x20
+#define        TH_ECE  0x40
+#define        TH_CWR  0x80
+
+/* table of valid flag combinations - ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+{
+       [TH_SYN]                        = 1,
+       [TH_SYN|TH_ACK]                 = 1,
+       [TH_RST]                        = 1,
+       [TH_RST|TH_ACK]                 = 1,
+       [TH_RST|TH_ACK|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK]                 = 1,
+       [TH_ACK]                        = 1,
+       [TH_ACK|TH_PUSH]                = 1,
+       [TH_ACK|TH_URG]                 = 1,
+       [TH_ACK|TH_URG|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK|TH_PUSH]         = 1,
+       [TH_FIN|TH_ACK|TH_URG]          = 1,
+       [TH_FIN|TH_ACK|TH_URG|TH_PUSH]  = 1,
+};
+
+/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
+static int tcp_error(struct sk_buff *skb,
+                    enum ip_conntrack_info *ctinfo,
+                    unsigned int hooknum)
+{
+       struct iphdr *iph = skb->nh.iph;
+       struct tcphdr _tcph, *th;
+       unsigned int tcplen = skb->len - iph->ihl * 4;
+       u_int8_t tcpflags;
+
+       /* Smaller that minimal TCP header? */
+       th = skb_header_pointer(skb, iph->ihl * 4,
+                               sizeof(_tcph), &_tcph);
+       if (th == NULL) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                               "ip_ct_tcp: short packet ");
+               return -NF_ACCEPT;
+       }
+  
+       /* Not whole TCP header or malformed packet */
+       if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                               "ip_ct_tcp: truncated/malformed packet ");
+               return -NF_ACCEPT;
+       }
+  
+       /* Checksum invalid? Ignore.
+        * We skip checking packets on the outgoing path
+        * because the semantic of CHECKSUM_HW is different there 
+        * and moreover root might send raw packets.
+        */
+       /* FIXME: Source route IP option packets --RR */
+       if (hooknum == NF_IP_PRE_ROUTING
+           && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
+                                skb->ip_summed == CHECKSUM_HW ? skb->csum
+                                : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                 "ip_ct_tcp: bad TCP checksum ");
+               return -NF_ACCEPT;
+       }
+
+       /* Check TCP flags. */
+       tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+       if (!tcp_valid_flags[tcpflags]) {
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                 "ip_ct_tcp: invalid TCP flag combination ");
+               return -NF_ACCEPT;
+       }
+
+       return NF_ACCEPT;
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
@@ -172,103 +828,260 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                      const struct sk_buff *skb,
                      enum ip_conntrack_info ctinfo)
 {
-       enum tcp_conntrack newconntrack, oldtcpstate;
-       struct tcphdr tcph;
-
-       if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
-               return -1;
-       if (skb->len < skb->nh.iph->ihl * 4 + tcph.doff * 4)
-               return -1;
-
-       /* If only reply is a RST, we can consider ourselves not to
-          have an established connection: this is a fairly common
-          problem case, so we can delete the conntrack
-          immediately.  --RR */
-       if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) {
-               if (del_timer(&conntrack->timeout))
-                       conntrack->timeout.function((unsigned long)conntrack);
+       enum tcp_conntrack new_state, old_state;
+       enum ip_conntrack_dir dir;
+       struct iphdr *iph = skb->nh.iph;
+       struct tcphdr *th, _tcph;
+       unsigned long timeout;
+       unsigned int index;
+       
+       th = skb_header_pointer(skb, iph->ihl * 4,
+                               sizeof(_tcph), &_tcph);
+       BUG_ON(th == NULL);
+       
+       WRITE_LOCK(&tcp_lock);
+       old_state = conntrack->proto.tcp.state;
+       dir = CTINFO2DIR(ctinfo);
+       index = get_conntrack_index(th);
+       new_state = tcp_conntracks[dir][index][old_state];
+
+       switch (new_state) {
+       case TCP_CONNTRACK_IGNORE:
+               /* Either SYN in ORIGINAL, or SYN/ACK in REPLY direction. */
+               if (index == TCP_SYNACK_SET
+                   && conntrack->proto.tcp.last_index == TCP_SYN_SET
+                   && conntrack->proto.tcp.last_dir != dir
+                   && after(ntohl(th->ack_seq),
+                            conntrack->proto.tcp.last_seq)) {
+                       /* This SYN/ACK acknowledges a SYN that we earlier 
+                        * ignored as invalid. This means that the client and
+                        * the server are both in sync, while the firewall is
+                        * not. We kill this session and block the SYN/ACK so
+                        * that the client cannot but retransmit its SYN and 
+                        * thus initiate a clean new session.
+                        */
+                       WRITE_UNLOCK(&tcp_lock);
+                       if (LOG_INVALID(IPPROTO_TCP))
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                         "ip_ct_tcp: killing out of sync session ");
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return -NF_DROP;
+               }
+               conntrack->proto.tcp.last_index = index;
+               conntrack->proto.tcp.last_dir = dir;
+               conntrack->proto.tcp.last_seq = ntohl(th->seq);
+               
+               WRITE_UNLOCK(&tcp_lock);
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                 "ip_ct_tcp: invalid SYN (ignored) ");
                return NF_ACCEPT;
+       case TCP_CONNTRACK_MAX:
+               /* Invalid packet */
+               DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
+                      dir, get_conntrack_index(th),
+                      old_state);
+               WRITE_UNLOCK(&tcp_lock);
+               if (LOG_INVALID(IPPROTO_TCP))
+                       nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                 "ip_ct_tcp: invalid state ");
+               return -NF_ACCEPT;
+       case TCP_CONNTRACK_SYN_SENT:
+               if (old_state >= TCP_CONNTRACK_TIME_WAIT) {     
+                       /* Attempt to reopen a closed connection.
+                       * Delete this connection and look up again. */
+                       WRITE_UNLOCK(&tcp_lock);
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return -NF_REPEAT;
+               }
+               break;
+       case TCP_CONNTRACK_CLOSE:
+               if (index == TCP_RST_SET
+                   && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+                   && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET
+                   && after(ntohl(th->ack_seq),
+                            conntrack->proto.tcp.last_seq)) {
+                       /* Ignore RST closing down invalid SYN 
+                          we had let trough. */ 
+                       WRITE_UNLOCK(&tcp_lock);
+                       if (LOG_INVALID(IPPROTO_TCP))
+                               nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                         "ip_ct_tcp: invalid RST (ignored) ");
+                       return NF_ACCEPT;
+               }
+               /* Just fall trough */
+       default:
+               /* Keep compilers happy. */
+               break;
        }
 
-       WRITE_LOCK(&tcp_lock);
-       oldtcpstate = conntrack->proto.tcp.state;
-       newconntrack
-               = tcp_conntracks
-               [CTINFO2DIR(ctinfo)]
-               [get_conntrack_index(&tcph)][oldtcpstate];
-
-       /* Invalid */
-       if (newconntrack == TCP_CONNTRACK_MAX) {
-               DEBUGP("ip_conntrack_tcp: Invalid dir=%i index=%u conntrack=%u\n",
-                      CTINFO2DIR(ctinfo), get_conntrack_index(&tcph),
-                      conntrack->proto.tcp.state);
+       if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, 
+                          skb, iph, th)) {
                WRITE_UNLOCK(&tcp_lock);
-               return -1;
+               return -NF_ACCEPT;
        }
+       /* From now on we have got in-window packets */
+       
+       /* If FIN was trimmed off, we don't change state. */
+       conntrack->proto.tcp.last_index = index;
+       new_state = tcp_conntracks[dir][index][old_state];
 
-       conntrack->proto.tcp.state = newconntrack;
-
-       /* Poor man's window tracking: record SYN/ACK for handshake check */
-       if (oldtcpstate == TCP_CONNTRACK_SYN_SENT
-           && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
-           && tcph.syn && tcph.ack) {
-               conntrack->proto.tcp.handshake_ack
-                       = htonl(ntohl(tcph.seq) + 1);
-               goto out;
-       }
+       DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
+              "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
+               NIPQUAD(iph->saddr), ntohs(th->source),
+               NIPQUAD(iph->daddr), ntohs(th->dest),
+               (th->syn ? 1 : 0), (th->ack ? 1 : 0),
+               (th->fin ? 1 : 0), (th->rst ? 1 : 0),
+               old_state, new_state);
 
-       /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */
-       if (oldtcpstate == TCP_CONNTRACK_SYN_RECV
-           && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL
-           && tcph.ack && !tcph.syn
-           && tcph.ack_seq == conntrack->proto.tcp.handshake_ack)
-               set_bit(IPS_ASSURED_BIT, &conntrack->status);
+       conntrack->proto.tcp.state = new_state;
+       timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
+                 && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
+                 ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
+       WRITE_UNLOCK(&tcp_lock);
 
-out:   WRITE_UNLOCK(&tcp_lock);
-       ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]);
+       if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+               /* If only reply is a RST, we can consider ourselves not to
+                  have an established connection: this is a fairly common
+                  problem case, so we can delete the conntrack
+                  immediately.  --RR */
+               if (th->rst) {
+                       if (del_timer(&conntrack->timeout))
+                               conntrack->timeout.function((unsigned long)
+                                                           conntrack);
+                       return NF_ACCEPT;
+               }
+       } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+                  && (old_state == TCP_CONNTRACK_SYN_RECV
+                      || old_state == TCP_CONNTRACK_ESTABLISHED)
+                  && new_state == TCP_CONNTRACK_ESTABLISHED) {
+               /* Set ASSURED if we see see valid ack in ESTABLISHED 
+                  after SYN_RECV or a valid answer for a picked up 
+                  connection. */
+                       set_bit(IPS_ASSURED_BIT, &conntrack->status);
+       }
+       ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
 
        return NF_ACCEPT;
 }
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
+  /* Called when a new connection for this protocol found. */
+static int tcp_new(struct ip_conntrack *conntrack,
+                  const struct sk_buff *skb)
 {
-       enum tcp_conntrack newconntrack;
-       struct tcphdr tcph;
-
-       if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
-               return -1;
+       enum tcp_conntrack new_state;
+       struct iphdr *iph = skb->nh.iph;
+       struct tcphdr *th, _tcph;
+#ifdef DEBUGP_VARS
+       struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
+       struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
+#endif
 
+       th = skb_header_pointer(skb, iph->ihl * 4,
+                               sizeof(_tcph), &_tcph);
+       BUG_ON(th == NULL);
+       
        /* Don't need lock here: this conntrack not in circulation yet */
-       newconntrack
-               = tcp_conntracks[0][get_conntrack_index(&tcph)]
+       new_state
+               = tcp_conntracks[0][get_conntrack_index(th)]
                [TCP_CONNTRACK_NONE];
 
        /* Invalid: delete conntrack */
-       if (newconntrack == TCP_CONNTRACK_MAX) {
-               DEBUGP("ip_conntrack_tcp: invalid new deleting.\n");
+       if (new_state >= TCP_CONNTRACK_MAX) {
+               DEBUGP("ip_ct_tcp: invalid new deleting.\n");
+               return 0;
+       }
+
+       if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               /* SYN packet */
+               conntrack->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            iph, th);
+               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
+               conntrack->proto.tcp.seen[0].td_maxend =
+                       conntrack->proto.tcp.seen[0].td_end;
+
+               tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
+               conntrack->proto.tcp.seen[1].flags = 0;
+               conntrack->proto.tcp.seen[0].loose = 
+               conntrack->proto.tcp.seen[1].loose = 0;
+       } else if (ip_ct_tcp_loose == 0) {
+               /* Don't try to pick up connections. */
                return 0;
+       } else {
+               /*
+                * We are in the middle of a connection,
+                * its history is lost for us.
+                * Let's try to use the data from the packet.
+                */
+               conntrack->proto.tcp.seen[0].td_end =
+                       segment_seq_plus_len(ntohl(th->seq), skb->len,
+                                            iph, th);
+               conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+               if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+                       conntrack->proto.tcp.seen[0].td_maxwin = 1;
+               conntrack->proto.tcp.seen[0].td_maxend =
+                       conntrack->proto.tcp.seen[0].td_end + 
+                       conntrack->proto.tcp.seen[0].td_maxwin;
+               conntrack->proto.tcp.seen[0].td_scale = 0;
+
+               /* We assume SACK. Should we assume window scaling too? */
+               conntrack->proto.tcp.seen[0].flags =
+               conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
+               conntrack->proto.tcp.seen[0].loose = 
+               conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
        }
+    
+       conntrack->proto.tcp.seen[1].td_end = 0;
+       conntrack->proto.tcp.seen[1].td_maxend = 0;
+       conntrack->proto.tcp.seen[1].td_maxwin = 1;
+       conntrack->proto.tcp.seen[1].td_scale = 0;      
 
-       conntrack->proto.tcp.state = newconntrack;
+       /* tcp_packet will set them */
+       conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
+       conntrack->proto.tcp.last_index = TCP_NONE_SET;
+        
+       DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
+              "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+               sender->td_end, sender->td_maxend, sender->td_maxwin,
+               sender->td_scale, 
+               receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+               receiver->td_scale);
        return 1;
 }
-
+  
 static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
                               const struct sk_buff *skb)
 {
        const struct iphdr *iph = skb->nh.iph;
-       struct tcphdr tcph;
+       struct tcphdr *th, _tcph;
        unsigned int datalen;
 
-       if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0)
+       th = skb_header_pointer(skb, iph->ihl * 4,
+                               sizeof(_tcph), &_tcph);
+       if (th == NULL)
                return 0;
-       datalen = skb->len - iph->ihl*4 - tcph.doff*4;
+       datalen = skb->len - iph->ihl*4 - th->doff*4;
 
-       return between(exp->seq, ntohl(tcph.seq), ntohl(tcph.seq) + datalen);
+       return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen);
 }
 
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp
-= { { NULL, NULL }, IPPROTO_TCP, "tcp",
-    tcp_pkt_to_tuple, tcp_invert_tuple, tcp_print_tuple, tcp_print_conntrack,
-    tcp_packet, tcp_new, NULL, tcp_exp_matches_pkt, NULL };
+struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
+{
+       .proto                  = IPPROTO_TCP,
+       .name                   = "tcp",
+       .pkt_to_tuple           = tcp_pkt_to_tuple,
+       .invert_tuple           = tcp_invert_tuple,
+       .print_tuple            = tcp_print_tuple,
+       .print_conntrack        = tcp_print_conntrack,
+       .packet                 = tcp_packet,
+       .new                    = tcp_new,
+       .exp_matches_pkt        = tcp_exp_matches_pkt,
+       .error                  = tcp_error,
+};