The previous commit lost a hunk that I checked in in the one before that. The two...
authorSapan Bhatia <sapanb@cs.princeton.edu>
Wed, 17 Sep 2008 22:26:40 +0000 (22:26 +0000)
committerSapan Bhatia <sapanb@cs.princeton.edu>
Wed, 17 Sep 2008 22:26:40 +0000 (22:26 +0000)
applies to UDP, the second to TCP, so this should not entail a full test cycle. I'm going to build the newest version
of the kernel and install it on planetlab-7 anyhow.

linux-2.6-522-iptables-connection-tagging.patch

index 881c435..17c807d 100644 (file)
@@ -1,6 +1,6 @@
 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_MARK.h linux-2.6.22-522/include/linux/netfilter/xt_MARK.h
 --- linux-2.6.22-521/include/linux/netfilter/xt_MARK.h 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/include/linux/netfilter/xt_MARK.h 2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/include/linux/netfilter/xt_MARK.h 2008-09-17 17:59:53.000000000 -0400
 @@ -11,6 +11,7 @@
        XT_MARK_SET=0,
        XT_MARK_AND,
@@ -11,7 +11,7 @@ diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_MARK.h linux-2.6.22-522/i
  struct xt_mark_target_info_v1 {
 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h
 --- linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h       1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h       2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h       2008-09-17 17:59:53.000000000 -0400
 @@ -0,0 +1,14 @@
 +#ifndef _XT_SETXID_H_target
 +#define _XT_SETXID_H_target
@@ -29,7 +29,7 @@ diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h linux-2.6.22-522
 +#endif /*_XT_SETXID_H_target*/
 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h
 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h   2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h   2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h   2008-09-17 17:59:53.000000000 -0400
 @@ -12,6 +12,7 @@
  #define IPT_MARK_SET  XT_MARK_SET
  #define IPT_MARK_AND  XT_MARK_AND
@@ -40,7 +40,7 @@ diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22
  
 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h
 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-09-17 17:59:53.000000000 -0400
 @@ -0,0 +1,13 @@
 +#ifndef _IPT_SETXID_H_target
 +#define _IPT_SETXID_H_target
@@ -57,7 +57,7 @@ diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.
 +#endif /*_IPT_SETXID_H_target*/
 diff -Nurb linux-2.6.22-521/include/net/netfilter/nf_conntrack.h linux-2.6.22-522/include/net/netfilter/nf_conntrack.h
 --- linux-2.6.22-521/include/net/netfilter/nf_conntrack.h      2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/include/net/netfilter/nf_conntrack.h      2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/include/net/netfilter/nf_conntrack.h      2008-09-17 17:59:53.000000000 -0400
 @@ -131,6 +131,9 @@
        /* Storage reserved for other modules: */
        union nf_conntrack_proto proto;
@@ -70,7 +70,7 @@ diff -Nurb linux-2.6.22-521/include/net/netfilter/nf_conntrack.h linux-2.6.22-52
  };
 diff -Nurb linux-2.6.22-521/net/netfilter/Kconfig linux-2.6.22-522/net/netfilter/Kconfig
 --- linux-2.6.22-521/net/netfilter/Kconfig     2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/Kconfig     2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/net/netfilter/Kconfig     2008-09-17 17:59:53.000000000 -0400
 @@ -389,6 +389,13 @@
  
          To compile it as a module, choose M here.  If unsure, say N.
@@ -87,7 +87,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/Kconfig linux-2.6.22-522/net/netfilter
        depends on NETFILTER_XTABLES
 diff -Nurb linux-2.6.22-521/net/netfilter/Makefile linux-2.6.22-522/net/netfilter/Makefile
 --- linux-2.6.22-521/net/netfilter/Makefile    2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/Makefile    2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/net/netfilter/Makefile    2008-09-17 17:59:53.000000000 -0400
 @@ -37,6 +37,7 @@
  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
  
@@ -98,7 +98,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/Makefile linux-2.6.22-522/net/netfilte
  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/net/netfilter/nf_conntrack_core.c
 --- linux-2.6.22-521/net/netfilter/nf_conntrack_core.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/nf_conntrack_core.c 2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/net/netfilter/nf_conntrack_core.c 2008-09-17 17:59:53.000000000 -0400
 @@ -726,6 +726,8 @@
  
        /* Overload tuple linked list to put us in unconfirmed list. */
@@ -110,8 +110,8 @@ diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/n
  
 diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c
 --- linux-2.6.22-521/net/netfilter/xt_MARK.c   2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-09-14 16:50:22.000000000 -0400
-@@ -5,13 +5,18 @@
++++ linux-2.6.22-522/net/netfilter/xt_MARK.c   2008-09-17 18:29:52.000000000 -0400
+@@ -5,13 +5,19 @@
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
@@ -122,6 +122,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
 +#include <linux/version.h>
  #include <linux/skbuff.h>
  #include <linux/ip.h>
++#include <net/udp.h>
  #include <net/checksum.h>
 +#include <net/route.h>
 +#include <net/inet_hashtables.h>
@@ -130,7 +131,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  #include <linux/netfilter/x_tables.h>
  #include <linux/netfilter/xt_MARK.h>
  
-@@ -21,6 +26,50 @@
+@@ -21,6 +27,50 @@
  MODULE_ALIAS("ipt_MARK");
  MODULE_ALIAS("ip6t_MARK");
  
@@ -181,141 +182,218 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static unsigned int
  target_v0(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -35,6 +84,10 @@
+@@ -35,6 +85,68 @@
        return XT_CONTINUE;
  }
  
 +extern DEFINE_PER_CPU(int, sknid_elevator);
 +
++static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
++                      __be32 daddr, __be16 dport,
++                      int dif, struct hlist_head udptable[])
++{
++    struct sock *sk, *result = NULL;
++    struct hlist_node *node;
++    unsigned short hnum = ntohs(dport);
++    int badness = -1;
++
++    read_lock(&udp_hash_lock);
++
++    sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
++        struct inet_sock *inet = inet_sk(sk);
++
++        if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
++            int score = (sk->sk_family == PF_INET ? 1 : 0);
++
++            if (inet->rcv_saddr) {
++                if (inet->rcv_saddr != daddr)
++                    continue;
++                score+=2;
++            } else {
++                /* block non nx_info ips */
++                if (!v4_addr_in_nx_info(sk->sk_nx_info,
++                    daddr, NXA_MASK_BIND))
++                    continue;
++            }
++            if (inet->daddr) {
++                if (inet->daddr != saddr)
++                    continue;
++                score+=2;
++            }
++            if (inet->dport) {
++                if (inet->dport != sport)
++                    continue;
++                score+=2;
++            }
++            if (sk->sk_bound_dev_if) {
++                if (sk->sk_bound_dev_if != dif)
++                    continue;
++                score+=2;
++            }
++            if (score == 9) {
++                result = sk;
++                break;
++            } else if (score > badness) {
++                result = sk;
++                badness = score;
++            }
++        }
++    }
++
++    if (result)
++        sock_hold(result);
++    read_unlock(&udp_hash_lock);
++    return result;
++}
++
 +#define related(ct) (ct==(IP_CT_IS_REPLY + IP_CT_RELATED))
 +
  static unsigned int
  target_v1(struct sk_buff **pskb,
          const struct net_device *in,
-@@ -44,7 +97,20 @@
+@@ -44,7 +156,20 @@
          const void *targinfo)
  {
        const struct xt_mark_target_info_v1 *markinfo = targinfo;
 -      int mark = 0;
-+      enum ip_conntrack_info ctinfo;
-+      struct sock *connection_sk;
-+      int dif;
-+      struct nf_conn *ct;
-+      extern struct inet_hashinfo tcp_hashinfo;
-+      enum ip_conntrack_dir dir;
-+      int *curtag;
-+      u_int32_t src_ip;
-+      u_int32_t dst_ip;
-+      u_int16_t proto, src_port;
-+      u_int32_t ip;
-+      u_int16_t port;
-+
-+      int mark = -1;
++              enum ip_conntrack_info ctinfo;
++              struct sock *connection_sk;
++              int dif;
++              struct nf_conn *ct;
++              extern struct inet_hashinfo tcp_hashinfo;
++              enum ip_conntrack_dir dir;
++              int *curtag;
++              u_int32_t src_ip;
++              u_int32_t dst_ip;
++              u_int16_t proto, src_port;
++              u_int32_t ip;
++              u_int16_t port;
++
++              int mark = -1;
  
        switch (markinfo->mode) {
        case XT_MARK_SET:
-@@ -58,13 +124,107 @@
+@@ -58,13 +183,126 @@
        case XT_MARK_OR:
                mark = (*pskb)->mark | markinfo->mark;
                break;
 +
-+                      case XT_MARK_COPYXID: 
-+                                      dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
-+
-+                                      ct = nf_ct_get((*pskb), &ctinfo);
-+                                      if (!ct) 
-+                                                      break;
-+
-+                                      dir = CTINFO2DIR(ctinfo);
-+                                      src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
-+                                      dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+                                      src_port = get_src_port(&ct->tuplehash[dir].tuple);
-+                                      proto = ct->tuplehash[dir].tuple.dst.protonum;
-+
-+                                      ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-+                                      port = get_dst_port(&ct->tuplehash[dir].tuple);
-+
-+                                      if (proto == 1 || proto == 17) {
-+                                                      if ((*pskb)->mark>0) /* The packet is marked, it's going out */
-+                                                      {
-+                                                                      ct->xid[0]=(*pskb)->mark;
++                              case XT_MARK_COPYXID: 
++                                              dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
++
++                                              ct = nf_ct_get((*pskb), &ctinfo);
++                                              if (!ct) 
++                                                              break;
++
++                                              dir = CTINFO2DIR(ctinfo);
++                                              src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
++                                              dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                              src_port = get_src_port(&ct->tuplehash[dir].tuple);
++                                              proto = ct->tuplehash[dir].tuple.dst.protonum;
++
++                                              ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                              port = get_dst_port(&ct->tuplehash[dir].tuple);
++
++                                              if (proto == 1) {
++                                                              if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++                                                              {
++                                                                              ct->xid[0]=(*pskb)->mark;
++                                                              }
++
++                                                              if (ct->xid[0] > 0) {
++                                                                              mark = ct->xid[0];
++                                                              }
++                                              }
++                                              else if (proto == 17) {
++                                                              struct sock *sk;
++                                                              if (!(*pskb)->mark) {
++                                                                              sk = __udp4_lib_lookup(src_ip, src_port, ip, port,
++                                                                                                              dif, udp_hash);
++
++                                                                              if (sk && hooknum==NF_IP_LOCAL_IN) {
++                                                                                              mark=sk->sk_nid;
++                                                                              }
++
++                                                                              if (sk) {
++                                                                                              sock_put(sk);
++                                                                              }
++                                                              }
++                                                              else
++                                                                              if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++                                                                              {
++                                                                                              ct->xid[0]=(*pskb)->mark;
++                                                                              }
++                                              }
++                                              else if (proto == 6) /* TCP */{
++                                                              int sockettype=0; /* Established socket */
++                                                              /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in
++                                                               * that order. The order is important for Codemux connections to be handled properly */
++
++                                                              connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++
++                                                              if (!connection_sk) {
++                                                                              connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif);
++                                                                              sockettype=1; /* Listening socket */
++                                                              }
++
++                                                              if (connection_sk) {
++                                                                              /* The peercred is not set. We set it if the other side has an xid. */
++                                                                              if (!PEERCRED_SET(connection_sk->sk_peercred.uid)
++                                                                                                              && ct->xid[!dir]>0 && (sockettype==0)) {
++                                                                                              connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
++                                                                              }
++
++                                                                              /* The peercred is set, and is not equal to the XID of 'the other side' */
++                                                                              else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) {
++                                                                                              mark = connection_sk->sk_peercred.uid;
++                                                                              }
++
++                                                                              /* Has this connection already been tagged? */
++                                                                              if (ct->xid[dir] < 1) {
++                                                                                              /* No - let's tag it */ 
++                                                                                              ct->xid[dir]=connection_sk->sk_nid;
++
++                                                                              }
++
++                                                                              if (mark==-1 && (ct->xid[dir]!= 0))
++                                                                                              mark = ct->xid[dir];
++
++                                                                              if (connection_sk->sk_state == TCP_TIME_WAIT) {
++                                                                                              inet_twsk_put(inet_twsk(connection_sk));
++                                                                                              break;
++                                                                              }
++                                                                              else
++                                                                                              sock_put(connection_sk);
        }
  
-+                                                      if (ct->xid[0] > 0) {
-+                                                                      mark = ct->xid[0];
-+                                                      }
-+
-+                                      }
-+                                      else if (proto == 6) /* TCP */{
-+                                                      int sockettype=0; /* Established socket */
-+                                                      /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in
-+                                                       * that order. The order is important for Codemux connections to be handled properly */
-+
-+                                                      connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
-+
-+                                                      if (!connection_sk) {
-+                                                              connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif);
-+                                                              sockettype=1; /* Listening socket */
-+                                                      }
-+
-+                                                      if (connection_sk) {
-+                                                                      /* The peercred is not set. We set it if the other side has an xid. */
-+                                                                      if (!PEERCRED_SET(connection_sk->sk_peercred.uid)
-+                                                                                                      && ct->xid[!dir]>0 && (sockettype==0)) {
-+                                                                                      connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
-+                                                                      }
-+
-+                                                                      /* The peercred is set, and is not equal to the XID of 'the other side' */
-+                                                                      else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) {
-+                                                                                      mark = connection_sk->sk_peercred.uid;
-+                                                                      }
-+
-+                                                                      /* Has this connection already been tagged? */
-+                                                                      if (ct->xid[dir] < 1) {
-+                                                                                      /* No - let's tag it */ 
-+                                                                                      ct->xid[dir]=connection_sk->sk_nid;
-+
-+                                                                      }
-+
-+                                                                      if (mark==-1 && (ct->xid[dir]!= 0))
-+                                                                                      mark = ct->xid[dir];
-+
-+                                                                      if (connection_sk->sk_state == TCP_TIME_WAIT) {
-+                                                                                      inet_twsk_put(inet_twsk(connection_sk));
-+                                                                                      break;
-+                                                                      }
-+                                                                      else
-+                                                                                      sock_put(connection_sk);
-+                                                      }
-+
-+                                                      /* All else failed. Is this a connection over raw sockets? That explains
-+                                                       * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
-+                                                      if (ct->xid[dir]<1) {
-+                                                                      if ((*pskb)->skb_tag) {
-+                                                                                      ct->xid[dir]=(*pskb)->skb_tag;
-+                                                                      }
-+                                                      }
-+
-+                                                      /* Covers CoDemux case */
-+                                                      if (mark < 1 && (ct->xid[dir]>0)) {
-+                                                                      mark = ct->xid[dir];
-+                                                      }
-+
-+                                                      if (mark < 1 && (ct->xid[!dir]>0)) {
-+                                                                      mark = ct->xid[!dir];
-+                                                      }
-+                                                      break;
-+                                      }
-+      }
-+      if (mark != -1) {
++                                                              /* All else failed. Is this a connection over raw sockets? That explains
++                                                               * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
++                                                              if (ct->xid[dir]<1) {
++                                                                              if ((*pskb)->skb_tag) {
++                                                                                              ct->xid[dir]=(*pskb)->skb_tag;
++                                                                              }
++                                                              }
++
++                                                              /* Covers CoDemux case */
++                                                              if (mark < 1 && (ct->xid[dir]>0)) {
++                                                                              mark = ct->xid[dir];
++                                                              }
++
++                                                              if (mark < 1 && (ct->xid[!dir]>0)) {
++                                                                              mark = ct->xid[!dir];
++                                                              }
++                                                              break;
++                                              }
++              }
++              if (mark != -1) {
        (*pskb)->mark = mark;
-+      }
++              }
 +
-+      curtag=&__get_cpu_var(sknid_elevator);
-+      if (mark > 0 && *curtag==-2 && hooknum==NF_IP_LOCAL_IN) 
-+      {
-+              *curtag = mark;
-+      }
++              curtag=&__get_cpu_var(sknid_elevator);
++              if (mark > 0 && *curtag==-2 && hooknum==NF_IP_LOCAL_IN) 
++              {
++                              *curtag = mark;
++              }
        return XT_CONTINUE;
  }
  
@@ -323,7 +401,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
  static int
  checkentry_v0(const char *tablename,
              const void *entry,
-@@ -92,7 +252,8 @@
+@@ -92,7 +330,8 @@
  
        if (markinfo->mode != XT_MARK_SET
            && markinfo->mode != XT_MARK_AND
@@ -335,7 +413,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt
                return 0;
 diff -Nurb linux-2.6.22-521/net/netfilter/xt_SETXID.c linux-2.6.22-522/net/netfilter/xt_SETXID.c
 --- linux-2.6.22-521/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.6.22-522/net/netfilter/xt_SETXID.c 2008-07-28 16:36:24.000000000 -0400
++++ linux-2.6.22-522/net/netfilter/xt_SETXID.c 2008-09-17 17:59:53.000000000 -0400
 @@ -0,0 +1,79 @@
 +#include <linux/module.h>
 +#include <linux/skbuff.h>