Fix for the recently reported accounting problem: Ticket #396 and Ticket #202.
authorSapan Bhatia <sapanb@cs.princeton.edu>
Sun, 14 Sep 2008 20:47:39 +0000 (20:47 +0000)
committerSapan Bhatia <sapanb@cs.princeton.edu>
Sun, 14 Sep 2008 20:47:39 +0000 (20:47 +0000)
linux-2.6-522-iptables-connection-tagging.patch

index 1513379..881c435 100644 (file)
@@ -110,8 +110,8 @@ diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/n
  
 diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c
 --- linux-2.6.22-521/net/netfilter/xt_MARK.c   2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-08-04 16:44:16.000000000 -0400
-@@ -5,13 +5,19 @@
++++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-09-14 16:50:22.000000000 -0400
+@@ -5,13 +5,18 @@
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
@@ -122,7 +122,6 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
 +#include <linux/version.h>
  #include <linux/skbuff.h>
  #include <linux/ip.h>
-+#include <net/udp.h>
  #include <net/checksum.h>
 +#include <net/route.h>
 +#include <net/inet_hashtables.h>
@@ -131,10 +130,12 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  #include <linux/netfilter/x_tables.h>
  #include <linux/netfilter/xt_MARK.h>
  
-@@ -21,6 +27,48 @@
+@@ -21,6 +26,50 @@
  MODULE_ALIAS("ipt_MARK");
  MODULE_ALIAS("ip6t_MARK");
  
++#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1)) 
++
 +static inline u_int16_t
 +get_dst_port(struct nf_conntrack_tuple *tuple)
 +{
@@ -180,75 +181,18 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static unsigned int
  target_v0(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -35,6 +83,67 @@
+@@ -35,6 +84,10 @@
        return XT_CONTINUE;
  }
  
 +extern DEFINE_PER_CPU(int, sknid_elevator);
-+static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
-+                      __be32 daddr, __be16 dport,
-+                      int dif, struct hlist_head udptable[])
-+{
-+    struct sock *sk, *result = NULL;
-+    struct hlist_node *node;
-+    unsigned short hnum = ntohs(dport);
-+    int badness = -1;
-+
-+    read_lock(&udp_hash_lock);
-+
-+    sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
-+        struct inet_sock *inet = inet_sk(sk);
-+
-+        if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
-+            int score = (sk->sk_family == PF_INET ? 1 : 0);
-+
-+            if (inet->rcv_saddr) {
-+                if (inet->rcv_saddr != daddr)
-+                    continue;
-+                score+=2;
-+            } else {
-+                /* block non nx_info ips */
-+                if (!v4_addr_in_nx_info(sk->sk_nx_info,
-+                    daddr, NXA_MASK_BIND))
-+                    continue;
-+            }
-+            if (inet->daddr) {
-+                if (inet->daddr != saddr)
-+                    continue;
-+                score+=2;
-+            }
-+            if (inet->dport) {
-+                if (inet->dport != sport)
-+                    continue;
-+                score+=2;
-+            }
-+            if (sk->sk_bound_dev_if) {
-+                if (sk->sk_bound_dev_if != dif)
-+                    continue;
-+                score+=2;
-+            }
-+            if (score == 9) {
-+                result = sk;
-+                break;
-+            } else if (score > badness) {
-+                result = sk;
-+                badness = score;
-+            }
-+        }
-+    }
-+
-+    if (result)
-+        sock_hold(result);
-+    read_unlock(&udp_hash_lock);
-+    return result;
-+}
 +
 +#define related(ct) (ct==(IP_CT_IS_REPLY + IP_CT_RELATED))
 +
  static unsigned int
  target_v1(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -44,7 +153,20 @@
+@@ -44,7 +97,20 @@
          const void *targinfo)
  {
        const struct xt_mark_target_info_v1 *markinfo = targinfo;
@@ -270,112 +214,98 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  
        switch (markinfo->mode) {
        case XT_MARK_SET:
-@@ -58,13 +180,121 @@
+@@ -58,13 +124,107 @@
        case XT_MARK_OR:
                mark = (*pskb)->mark | markinfo->mark;
                break;
 +
-+      case XT_MARK_COPYXID: 
-+            dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
-+
-+            ct = nf_ct_get((*pskb), &ctinfo);
-+            if (!ct) 
-+                    break;
-+
-+            dir = CTINFO2DIR(ctinfo);
-+            src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
-+            dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+            src_port = get_src_port(&ct->tuplehash[dir].tuple);
-+            proto = ct->tuplehash[dir].tuple.dst.protonum;
-+
-+            ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+            port = get_dst_port(&ct->tuplehash[dir].tuple);
-+
-+            if (proto == 1) {
-+                                if ((*pskb)->mark>0) /* The packet is marked, it's going out */
-+                                {
-+                                                ct->xid[0]=(*pskb)->mark;
-+                                }
-+
-+                                if (ct->xid[0] > 0) {
-+                                                mark = ct->xid[0];
-+                                }
-+            }
-+                else if (proto == 17) {
-+                  struct sock *sk;
-+                  if (!(*pskb)->mark) {
-+                          sk = __udp4_lib_lookup(src_ip, src_port, ip, port,
-+                                          dif, udp_hash);
-+
-+                          if (sk && hooknum==NF_IP_LOCAL_IN) {
-+                                  mark=sk->sk_nid;
-+                          }
-+
-+                                                if (sk) {
-+                                                                sock_put(sk);
-+                                                }
-+                  }
-+                  else
-+                  if ((*pskb)->mark>0) /* The packet is marked, it's going out */
-+                  {
-+                          ct->xid[0]=(*pskb)->mark;
-+                  }
-+          }
-+            else if (proto == 6) { 
-+                    if ((*pskb)->sk) {
-+                            /* It's a listening socket */
-+                            connection_sk = (*pskb)->sk;
-+                            sock_hold(connection_sk);
-+                    }
-+                    else   /* It might be a connected socket */
-+                            connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
-+
-+
-+                    if (connection_sk /* Well, some kind of TCP socket */) {
-+                            if (connection_sk->sk_peercred.uid == 0 || connection_sk->sk_peercred.uid == (__u32) -1) {
-+                                    /* Normal case - the peercred on the socket is not set */ 
-+                                    connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
-+                            }
-+                            else    /* Exceptional case - the peercred was set using SET_PEERCRED. Somebody wants us
-+                                       to mark packets with some arbitrary value.*/
-+                                    mark=connection_sk->sk_peercred.uid;
-+
-+                            /* Has this connection already been tagged? */
-+                            if (ct->xid[dir] < 1) {
-+                                    /* No - let's tag it */ 
-+                                    ct->xid[dir]=connection_sk->sk_nid;
-+                            }
-+
-+                            if (mark==-1 && (connection_sk->sk_nid != 0))
-+                                    mark = ct->xid[dir];
-+
-+
-+                            if (connection_sk->sk_state == TCP_TIME_WAIT) {
-+                                    inet_twsk_put(inet_twsk(connection_sk));
-+                                    break;
-+                            }
-+                            else
-+                                    sock_put(connection_sk);
-+                    }
-+
-+                    /* Covers CoDemux case */
-+                    if (mark < 1 && (ct->xid[dir]>0)) {
-+                            mark = ct->xid[dir];
++                      case XT_MARK_COPYXID: 
++                                      dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
++
++                                      ct = nf_ct_get((*pskb), &ctinfo);
++                                      if (!ct) 
++                                                      break;
++
++                                      dir = CTINFO2DIR(ctinfo);
++                                      src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
++                                      dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                      src_port = get_src_port(&ct->tuplehash[dir].tuple);
++                                      proto = ct->tuplehash[dir].tuple.dst.protonum;
++
++                                      ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                      port = get_dst_port(&ct->tuplehash[dir].tuple);
++
++                                      if (proto == 1 || proto == 17) {
++                                                      if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++                                                      {
++                                                                      ct->xid[0]=(*pskb)->mark;
        }
  
-+                    if (mark < 1 && (ct->xid[!dir]>0)) {
-+                              mark = ct->xid[!dir];
-+                    }
-+
-+                    /* All else failed. Is this a connection over raw sockets? That explains
-+                     * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
-+                    if (ct->xid[dir]<1) {
-+                      if ((*pskb)->skb_tag) {
-+                              ct->xid[dir]=(*pskb)->skb_tag;
-+                      }
-+                    }
-+            }
-+            break;
++                                                      if (ct->xid[0] > 0) {
++                                                                      mark = ct->xid[0];
++                                                      }
++
++                                      }
++                                      else if (proto == 6) /* TCP */{
++                                                      int sockettype=0; /* Established socket */
++                                                      /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in
++                                                       * that order. The order is important for Codemux connections to be handled properly */
++
++                                                      connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++
++                                                      if (!connection_sk) {
++                                                              connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif);
++                                                              sockettype=1; /* Listening socket */
++                                                      }
++
++                                                      if (connection_sk) {
++                                                                      /* The peercred is not set. We set it if the other side has an xid. */
++                                                                      if (!PEERCRED_SET(connection_sk->sk_peercred.uid)
++                                                                                                      && ct->xid[!dir]>0 && (sockettype==0)) {
++                                                                                      connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
++                                                                      }
++
++                                                                      /* The peercred is set, and is not equal to the XID of 'the other side' */
++                                                                      else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) {
++                                                                                      mark = connection_sk->sk_peercred.uid;
++                                                                      }
++
++                                                                      /* Has this connection already been tagged? */
++                                                                      if (ct->xid[dir] < 1) {
++                                                                                      /* No - let's tag it */ 
++                                                                                      ct->xid[dir]=connection_sk->sk_nid;
++
++                                                                      }
++
++                                                                      if (mark==-1 && (ct->xid[dir]!= 0))
++                                                                                      mark = ct->xid[dir];
++
++                                                                      if (connection_sk->sk_state == TCP_TIME_WAIT) {
++                                                                                      inet_twsk_put(inet_twsk(connection_sk));
++                                                                                      break;
++                                                                      }
++                                                                      else
++                                                                                      sock_put(connection_sk);
++                                                      }
++
++                                                      /* All else failed. Is this a connection over raw sockets? That explains
++                                                       * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
++                                                      if (ct->xid[dir]<1) {
++                                                                      if ((*pskb)->skb_tag) {
++                                                                                      ct->xid[dir]=(*pskb)->skb_tag;
++                                                                      }
++                                                      }
++
++                                                      /* Covers CoDemux case */
++                                                      if (mark < 1 && (ct->xid[dir]>0)) {
++                                                                      mark = ct->xid[dir];
++                                                      }
++
++                                                      if (mark < 1 && (ct->xid[!dir]>0)) {
++                                                                      mark = ct->xid[!dir];
++                                                      }
++                                                      break;
++                                      }
 +      }
 +      if (mark != -1) {
        (*pskb)->mark = mark;
@@ -393,7 +323,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static int
  checkentry_v0(const char *tablename,
              const void *entry,
-@@ -92,7 +322,8 @@
+@@ -92,7 +252,8 @@
  
        if (markinfo->mode != XT_MARK_SET
            && markinfo->mode != XT_MARK_AND