Fixing the accounting issue that causes certain connections to be misaccounted, and...
[linux-2.6.git] / linux-2.6-522-iptables-connection-tagging.patch
index 7ccbd55..881c435 100644 (file)
@@ -110,7 +110,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/n
  
 diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c
 --- linux-2.6.22-521/net/netfilter/xt_MARK.c   2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-07-31 14:49:48.000000000 -0400
++++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-09-14 16:50:22.000000000 -0400
 @@ -5,13 +5,18 @@
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
@@ -130,10 +130,12 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  #include <linux/netfilter/x_tables.h>
  #include <linux/netfilter/xt_MARK.h>
  
-@@ -21,6 +26,48 @@
+@@ -21,6 +26,50 @@
  MODULE_ALIAS("ipt_MARK");
  MODULE_ALIAS("ip6t_MARK");
  
++#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1)) 
++
 +static inline u_int16_t
 +get_dst_port(struct nf_conntrack_tuple *tuple)
 +{
@@ -179,7 +181,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static unsigned int
  target_v0(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -35,6 +82,10 @@
+@@ -35,6 +84,10 @@
        return XT_CONTINUE;
  }
  
@@ -190,7 +192,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static unsigned int
  target_v1(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -44,7 +95,20 @@
+@@ -44,7 +97,20 @@
          const void *targinfo)
  {
        const struct xt_mark_target_info_v1 *markinfo = targinfo;
@@ -212,93 +214,98 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  
        switch (markinfo->mode) {
        case XT_MARK_SET:
-@@ -58,13 +122,102 @@
+@@ -58,13 +124,107 @@
        case XT_MARK_OR:
                mark = (*pskb)->mark | markinfo->mark;
                break;
 +
-+      case XT_MARK_COPYXID: 
-+            dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
-+
-+            ct = nf_ct_get((*pskb), &ctinfo);
-+            if (!ct) 
-+                    break;
-+
-+            dir = CTINFO2DIR(ctinfo);
-+            src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
-+            dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+            src_port = get_src_port(&ct->tuplehash[dir].tuple);
-+            proto = ct->tuplehash[dir].tuple.dst.protonum;
-+
-+            ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+            port = get_dst_port(&ct->tuplehash[dir].tuple);
-+
-+            if (proto == 1 || proto == 17) {
-+                    if ((*pskb)->mark>0) /* The packet is marked, it's going out */
-+                    {
-+                            ct->xid[0]=(*pskb)->mark;
-+                    }
-+
-+                    if (ct->xid[0] > 0) {
-+                            mark = ct->xid[0];
-+                    }
-+
-+            }
-+            else if (proto == 6) { 
-+                    if ((*pskb)->sk) {
-+                            /* It's a listening socket */
-+                            connection_sk = (*pskb)->sk;
-+                            sock_hold(connection_sk);
-+                    }
-+                    else   /* It might be a connected socket */
-+                            connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
-+
-+
-+                    if (connection_sk /* Well, some kind of TCP socket */) {
-+                            if (connection_sk->sk_peercred.uid == 0 || connection_sk->sk_peercred.uid == (__u32) -1) {
-+                                    /* Normal case - the peercred on the socket is not set */ 
-+                                    connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
-+                            }
-+                            else    /* Exceptional case - the peercred was set using SET_PEERCRED. Somebody wants us
-+                                       to mark packets with some arbitrary value.*/
-+                                    mark=connection_sk->sk_peercred.uid;
-+
-+                            /* Has this connection already been tagged? */
-+                            if (ct->xid[dir] < 1) {
-+                                    /* No - let's tag it */ 
-+                                    ct->xid[dir]=connection_sk->sk_nid;
-+                            }
-+
-+                            if (mark==-1 && (ct->xid[dir] != 0))
-+                                    mark = ct->xid[dir];
-+
-+
-+                            if (connection_sk->sk_state == TCP_TIME_WAIT) {
-+                                    inet_twsk_put(inet_twsk(connection_sk));
-+                                    break;
-+                            }
-+                            else
-+                                    sock_put(connection_sk);
-+                    }
-+
-+                    /* Covers CoDemux case */
-+                    if (mark < 1 && (ct->xid[dir]>0)) {
-+                            mark = ct->xid[dir];
++                      case XT_MARK_COPYXID: 
++                                      dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
++
++                                      ct = nf_ct_get((*pskb), &ctinfo);
++                                      if (!ct) 
++                                                      break;
++
++                                      dir = CTINFO2DIR(ctinfo);
++                                      src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
++                                      dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                      src_port = get_src_port(&ct->tuplehash[dir].tuple);
++                                      proto = ct->tuplehash[dir].tuple.dst.protonum;
++
++                                      ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                      port = get_dst_port(&ct->tuplehash[dir].tuple);
++
++                                      if (proto == 1 || proto == 17) {
++                                                      if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++                                                      {
++                                                                      ct->xid[0]=(*pskb)->mark;
        }
  
-+                    if (mark < 1 && (ct->xid[!dir]>0)) {
-+                              mark = ct->xid[!dir];
-+                    }
-+
-+                    /* All else failed. Is this a connection over raw sockets? That explains
-+                     * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
-+                    if (ct->xid[dir]<1) {
-+                      if ((*pskb)->skb_tag) {
-+                              ct->xid[dir]=(*pskb)->skb_tag;
-+                      }
-+                    }
-+            }
-+            break;
++                                                      if (ct->xid[0] > 0) {
++                                                                      mark = ct->xid[0];
++                                                      }
++
++                                      }
++                                      else if (proto == 6) /* TCP */{
++                                                      int sockettype=0; /* Established socket */
++                                                      /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in
++                                                       * that order. The order is important for Codemux connections to be handled properly */
++
++                                                      connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++
++                                                      if (!connection_sk) {
++                                                              connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif);
++                                                              sockettype=1; /* Listening socket */
++                                                      }
++
++                                                      if (connection_sk) {
++                                                                      /* The peercred is not set. We set it if the other side has an xid. */
++                                                                      if (!PEERCRED_SET(connection_sk->sk_peercred.uid)
++                                                                                                      && ct->xid[!dir]>0 && (sockettype==0)) {
++                                                                                      connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
++                                                                      }
++
++                                                                      /* The peercred is set, and is not equal to the XID of 'the other side' */
++                                                                      else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) {
++                                                                                      mark = connection_sk->sk_peercred.uid;
++                                                                      }
++
++                                                                      /* Has this connection already been tagged? */
++                                                                      if (ct->xid[dir] < 1) {
++                                                                                      /* No - let's tag it */ 
++                                                                                      ct->xid[dir]=connection_sk->sk_nid;
++
++                                                                      }
++
++                                                                      if (mark==-1 && (ct->xid[dir]!= 0))
++                                                                                      mark = ct->xid[dir];
++
++                                                                      if (connection_sk->sk_state == TCP_TIME_WAIT) {
++                                                                                      inet_twsk_put(inet_twsk(connection_sk));
++                                                                                      break;
++                                                                      }
++                                                                      else
++                                                                                      sock_put(connection_sk);
++                                                      }
++
++                                                      /* All else failed. Is this a connection over raw sockets? That explains
++                                                       * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
++                                                      if (ct->xid[dir]<1) {
++                                                                      if ((*pskb)->skb_tag) {
++                                                                                      ct->xid[dir]=(*pskb)->skb_tag;
++                                                                      }
++                                                      }
++
++                                                      /* Covers CoDemux case */
++                                                      if (mark < 1 && (ct->xid[dir]>0)) {
++                                                                      mark = ct->xid[dir];
++                                                      }
++
++                                                      if (mark < 1 && (ct->xid[!dir]>0)) {
++                                                                      mark = ct->xid[!dir];
++                                                      }
++                                                      break;
++                                      }
 +      }
 +      if (mark != -1) {
        (*pskb)->mark = mark;
@@ -316,7 +323,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static int
  checkentry_v0(const char *tablename,
              const void *entry,
-@@ -92,7 +245,8 @@
+@@ -92,7 +252,8 @@
  
        if (markinfo->mode != XT_MARK_SET
            && markinfo->mode != XT_MARK_AND