* (Cleanup) Split up the VNET+ patch into its 5 component patches
authorSapan Bhatia <sapanb@cs.princeton.edu>
Wed, 9 Jul 2008 16:36:33 +0000 (16:36 +0000)
committerSapan Bhatia <sapanb@cs.princeton.edu>
Wed, 9 Jul 2008 16:36:33 +0000 (16:36 +0000)
* Ran a consistency check on the split-up to make sure we didn't lose anything

(to continue...)

kernel-2.6.spec
linux-2.6-521-packet-tagging.patch [new file with mode: 0644]
linux-2.6-522-iptables-connection-tagging.patch [new file with mode: 0644]
linux-2.6-523-raw-sockets.patch [new file with mode: 0644]
linux-2.6-524-peercred.patch [new file with mode: 0644]
linux-2.6-525-sknid-elevator.patch [new file with mode: 0644]

index 0cd576b..536dcf4 100644 (file)
@@ -163,6 +163,11 @@ Patch250: linux-2.6-250-ipsets.patch
 Patch500: linux-2.6-500-vserver-filesharing.patch
 Patch510: linux-2.6-510-ipod.patch
 Patch520: linux-2.6-520-vnet+.patch
+Patch521: linux-2.6-521-packet-tagging.patch
+Patch522: linux-2.6-522-iptables-connection-tagging.patch
+Patch523: linux-2.6-523-raw-sockets.patch
+Patch524: linux-2.6-524-peercred.patch
+Patch525: linux-2.6-525-sknid-elevator.patch
 Patch530: linux-2.6-530-built-by-support.patch
 Patch540: linux-2.6-540-oom-kill.patch
 Patch550: linux-2.6-550-raise-default-nfile-ulimit.patch
@@ -357,7 +362,14 @@ KERNEL_PREVIOUS=vanilla
 
 %ApplyPatch 500
 %ApplyPatch 510
-%ApplyPatch 520
+
+# VNET+ series
+%ApplyPatch 521
+%ApplyPatch 522
+%ApplyPatch 523
+%ApplyPatch 524
+%ApplyPatch 525
+
 %ApplyPatch 530
 %ApplyPatch 540
 %ApplyPatch 550
diff --git a/linux-2.6-521-packet-tagging.patch b/linux-2.6-521-packet-tagging.patch
new file mode 100644 (file)
index 0000000..5fcc1cf
--- /dev/null
@@ -0,0 +1,72 @@
+diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h
+--- linux-2.6.22-510/include/linux/skbuff.h    2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/skbuff.h    2008-06-06 17:07:56.000000000 -0400
+@@ -302,6 +302,7 @@
+ #endif
+       __u32                   mark;
++#define skb_tag                       mark
+       sk_buff_data_t          transport_header;
+       sk_buff_data_t          network_header;
+diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c
+--- linux-2.6.22-510/net/core/skbuff.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/core/skbuff.c 2008-06-06 17:07:56.000000000 -0400
+@@ -56,6 +56,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/init.h>
+ #include <linux/scatterlist.h>
++#include <linux/vs_network.h>
+ #include <net/protocol.h>
+ #include <net/dst.h>
+@@ -174,6 +175,7 @@
+       skb->data = data;
+       skb_reset_tail_pointer(skb);
+       skb->end = skb->tail + size;
++      if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0;
+       /* make sure we initialize shinfo sequentially */
+       shinfo = skb_shinfo(skb);
+       atomic_set(&shinfo->dataref, 1);
+@@ -443,6 +445,8 @@
+       C(tail);
+       C(end);
++      /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */
++
+       atomic_inc(&(skb_shinfo(skb)->dataref));
+       skb->cloned = 1;
+@@ -492,6 +496,7 @@
+       new->tc_index   = old->tc_index;
+ #endif
+       skb_copy_secmark(new, old);
++      new->skb_tag = old->skb_tag;
+       atomic_set(&new->users, 1);
+       skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+       skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
+--- linux-2.6.22-510/net/ipv4/af_inet.c        2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/af_inet.c        2008-06-06 17:07:56.000000000 -0400
+@@ -178,6 +178,8 @@
+                       return -EAGAIN;
+               }
+               inet->sport = htons(inet->num);
++              sk->sk_xid = vx_current_xid();
++              if (!in_interrupt()) sk->sk_nid = nx_current_nid(); else sk->sk_nid=0;
+       }
+       release_sock(sk);
+       return 0;
+diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c      2008-06-06 17:07:43.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c      2008-06-06 17:07:56.000000000 -0400
+@@ -49,6 +49,8 @@
+       else
+               logflags = NF_LOG_MASK;
++      printk("TAG=%d ", skb->skb_tag);
++
+       ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+       if (ih == NULL) {
+               printk("TRUNCATED");
+
diff --git a/linux-2.6-522-iptables-connection-tagging.patch b/linux-2.6-522-iptables-connection-tagging.patch
new file mode 100644 (file)
index 0000000..a3f28a0
--- /dev/null
@@ -0,0 +1,382 @@
+diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h
+--- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h 2008-06-06 17:07:56.000000000 -0400
+@@ -11,6 +11,7 @@
+       XT_MARK_SET=0,
+       XT_MARK_AND,
+       XT_MARK_OR,
++      XT_MARK_COPYXID,
+ };
+ struct xt_mark_target_info_v1 {
+diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h
+--- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h       1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h       2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,14 @@
++#ifndef _XT_SETXID_H_target
++#define _XT_SETXID_H_target
++
++/* Version 1 */
++enum {
++      XT_SET_PACKET_XID=0
++};
++
++struct xt_setxid_target_info_v1 {
++      unsigned long mark;
++      u_int8_t mode;
++};
++
++#endif /*_XT_SETXID_H_target*/
+diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h
+--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h   2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h   2008-06-06 17:07:56.000000000 -0400
+@@ -12,6 +12,7 @@
+ #define IPT_MARK_SET  XT_MARK_SET
+ #define IPT_MARK_AND  XT_MARK_AND
+ #define       IPT_MARK_OR     XT_MARK_OR
++#define IPT_MARK_COPYXID      XT_MARK_COPYXID
+ #define ipt_mark_target_info_v1 xt_mark_target_info_v1
+diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h
+--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,13 @@
++#ifndef _IPT_SETXID_H_target
++#define _IPT_SETXID_H_target
++
++/* Backwards compatibility for old userspace */
++
++#include <linux/netfilter/xt_SETXID.h>
++
++/* Version 1 */
++#define IPT_SET_PACKET_XID    XT_SET_PACKET_XID
++
++#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
++
++#endif /*_IPT_SETXID_H_target*/
+diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h
+--- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h      2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h      2008-06-06 17:07:56.000000000 -0400
+@@ -131,6 +131,9 @@
+       /* Storage reserved for other modules: */
+       union nf_conntrack_proto proto;
++      /* PLANETLAB. VNET-specific */
++      int xid[IP_CT_DIR_MAX];
++
+       /* features dynamically at the end: helper, nat (both optional) */
+       char data[0];
+ };
+diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig
+--- linux-2.6.22-510/net/netfilter/Kconfig     2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/Kconfig     2008-06-06 17:07:56.000000000 -0400
+@@ -389,6 +389,13 @@
+         To compile it as a module, choose M here.  If unsure, say N.
++config NETFILTER_XT_TARGET_SETXID
++      tristate '"SETXID" target support'
++      depends on NETFILTER_XTABLES
++      help
++        This option adds a `SETXID' target, which allows you to alter the
++        xid of a socket.
++
+ config NETFILTER_XT_MATCH_COMMENT
+       tristate  '"comment" match support'
+       depends on NETFILTER_XTABLES
+diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile
+--- linux-2.6.22-510/net/netfilter/Makefile    2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/Makefile    2008-06-06 17:07:56.000000000 -0400
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+ # targets
++obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c
+--- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c 2008-06-06 17:07:56.000000000 -0400
+@@ -726,6 +726,8 @@
+       /* Overload tuple linked list to put us in unconfirmed list. */
+       list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
++      conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
++      conntrack->xid[IP_CT_DIR_REPLY] = -1;
+       write_unlock_bh(&nf_conntrack_lock);
+
+diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c
+--- linux-2.6.22-510/net/netfilter/xt_MARK.c   2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/xt_MARK.c   2008-06-07 17:55:26.000000000 -0400
+@@ -5,13 +5,18 @@
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+  * published by the Free Software Foundation.
++ *
+  */
+ #include <linux/module.h>
++#include <linux/version.h>
+ #include <linux/skbuff.h>
+ #include <linux/ip.h>
+ #include <net/checksum.h>
++#include <net/route.h>
++#include <net/inet_hashtables.h>
++#include <net/netfilter/nf_conntrack.h>
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_MARK.h>
+@@ -21,6 +26,48 @@
+ MODULE_ALIAS("ipt_MARK");
+ MODULE_ALIAS("ip6t_MARK");
++static inline u_int16_t
++get_dst_port(struct nf_conntrack_tuple *tuple)
++{
++      switch (tuple->dst.protonum) {
++      case IPPROTO_GRE:
++              /* XXX Truncate 32-bit GRE key to 16 bits */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
++              return tuple->dst.u.gre.key;
++#else
++              return htons(ntohl(tuple->dst.u.gre.key));
++#endif  
++      case IPPROTO_ICMP:
++              /* Bind on ICMP echo ID */
++              return tuple->src.u.icmp.id;
++      case IPPROTO_TCP:
++              return tuple->dst.u.tcp.port;
++      case IPPROTO_UDP:
++              return tuple->dst.u.udp.port;
++      default:
++              return tuple->dst.u.all;
++      }
++}
++
++static inline u_int16_t
++get_src_port(struct nf_conntrack_tuple *tuple)
++{
++      switch (tuple->dst.protonum) {
++      case IPPROTO_GRE:
++              /* XXX Truncate 32-bit GRE key to 16 bits */
++              return htons(ntohl(tuple->src.u.gre.key));
++      case IPPROTO_ICMP:
++              /* Bind on ICMP echo ID */
++              return tuple->src.u.icmp.id;
++      case IPPROTO_TCP:
++              return tuple->src.u.tcp.port;
++      case IPPROTO_UDP:
++              return tuple->src.u.udp.port;
++      default:
++              return tuple->src.u.all;
++      }
++}
++
+ static unsigned int
+ target_v0(struct sk_buff **pskb,
+         const struct net_device *in,
+@@ -35,6 +82,8 @@
+       return XT_CONTINUE;
+ }
++extern DEFINE_PER_CPU(int, sknid_elevator);
++
+ static unsigned int
+ target_v1(struct sk_buff **pskb,
+         const struct net_device *in,
+@@ -44,7 +93,20 @@
+         const void *targinfo)
+ {
+       const struct xt_mark_target_info_v1 *markinfo = targinfo;
+-      int mark = 0;
++      enum ip_conntrack_info ctinfo;
++      struct sock *connection_sk;
++      int dif;
++      struct nf_conn *ct;
++      extern struct inet_hashinfo tcp_hashinfo;
++      enum ip_conntrack_dir dir;
++      int *curtag;
++      u_int32_t src_ip;
++      u_int32_t dst_ip;
++      u_int16_t proto, src_port;
++      u_int32_t ip;
++      u_int16_t port;
++
++      int mark = -1;
+       switch (markinfo->mode) {
+       case XT_MARK_SET:
+@@ -58,13 +120,74 @@
+       case XT_MARK_OR:
+               mark = (*pskb)->mark | markinfo->mark;
+               break;
++
++              case XT_MARK_COPYXID: 
++                                            
++                                            ct = nf_ct_get((*pskb), &ctinfo);
++                                            if (!ct) 
++                                                    break;
++
++                                            dir = CTINFO2DIR(ctinfo);
++                                            src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
++                                            dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                            src_port = get_src_port(&ct->tuplehash[dir].tuple);
++                                            proto = ct->tuplehash[dir].tuple.dst.protonum;
++
++                                            dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
++                                            ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++                                            port = get_dst_port(&ct->tuplehash[dir].tuple);
++
++                                            if (proto == 1 || proto == 17) {
++                                                    if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++                                                    {
++                                                              //if (ct->xid[0]>0 && ct->xid[0]!=(*pskb)->mark)
++                                                                      /*printk(KERN_CRIT "xt_MARK log: %d/%d/%d/%d\n",ct->xid[0],(*pskb)->mark,hooknum==NF_IP_LOCAL_IN,proto);*/
++
++                                                              ct->xid[0]=(*pskb)->mark;
+       }
++                                                    if (ct->xid[0] > 0) {
++                                                            mark = ct->xid[0];
++                                                    }
++
++                                            }
++                                            else if (proto == 6) { 
++                                                    if ((*pskb)->sk) {
++                                                            connection_sk = (*pskb)->sk;
++                                                            sock_hold(connection_sk);
++                                                    }
++                                                    else 
++                                                            connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++                                                            
++
++                                                    if (connection_sk) {
++                                                            if (connection_sk->sk_state == TCP_TIME_WAIT) {
++                                                                    inet_twsk_put(inet_twsk(connection_sk));
++                                                                    break;
++                                                            }
++                                                            connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
++                                                            ct->xid[!dir]=connection_sk->sk_nid;
++                                                            if (connection_sk->sk_nid != 0) 
++                                                                    mark = connection_sk->sk_nid;
++                                                            sock_put(connection_sk);
++                                                    }
++                                                    else 
++                                                            mark = -1 ; 
++                                            }
++                                            break;
++      }
++      if (mark != -1) {
+       (*pskb)->mark = mark;
++      }
++
++      curtag=&__get_cpu_var(sknid_elevator);
++      if (mark > 0 && *curtag==-2) 
++      {
++              *curtag = mark;
++      }
+       return XT_CONTINUE;
+ }
+-
+ static int
+ checkentry_v0(const char *tablename,
+             const void *entry,
+@@ -92,7 +215,8 @@
+       if (markinfo->mode != XT_MARK_SET
+           && markinfo->mode != XT_MARK_AND
+-          && markinfo->mode != XT_MARK_OR) {
++          && markinfo->mode != XT_MARK_OR
++          && markinfo->mode != XT_MARK_COPYXID) {
+               printk(KERN_WARNING "MARK: unknown mode %u\n",
+                      markinfo->mode);
+               return 0;
+diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c
+--- linux-2.6.22-510/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/net/netfilter/xt_SETXID.c 2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,79 @@
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/checksum.h>
++#include <linux/vs_network.h>
++
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_SETXID.h>
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("");
++MODULE_DESCRIPTION("");
++MODULE_ALIAS("ipt_SETXID");
++
++static unsigned int
++target_v1(struct sk_buff **pskb,
++        const struct net_device *in,
++        const struct net_device *out,
++        unsigned int hooknum,
++        const struct xt_target *target,
++        const void *targinfo)
++{
++      const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
++
++      switch (setxidinfo->mode) {
++      case XT_SET_PACKET_XID:
++               (*pskb)->skb_tag = setxidinfo->mark;
++              break;
++      }
++      return XT_CONTINUE;
++}
++
++
++static int
++checkentry_v1(const char *tablename,
++            const void *entry,
++            const struct xt_target *target,
++            void *targinfo,
++            unsigned int hook_mask)
++{
++      struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
++
++      if (setxidinfo->mode != XT_SET_PACKET_XID) {
++              printk(KERN_WARNING "SETXID: unknown mode %u\n",
++                     setxidinfo->mode);
++              return 0;
++      }
++
++      return 1;
++}
++
++static struct xt_target xt_setxid_target[] = {
++      {
++              .name           = "SETXID",
++              .family         = AF_INET,
++              .revision       = 1,
++              .checkentry     = checkentry_v1,
++              .target         = target_v1,
++              .targetsize     = sizeof(struct xt_setxid_target_info_v1),
++              .table          = "mangle",
++              .me             = THIS_MODULE,
++      }
++};
++
++static int __init init(void)
++{
++      int err;
++
++      err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
++      return err;
++}
++
++static void __exit fini(void)
++{
++      xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
++}
++
++module_init(init);
++module_exit(fini);
+
diff --git a/linux-2.6-523-raw-sockets.patch b/linux-2.6-523-raw-sockets.patch
new file mode 100644 (file)
index 0000000..24b05bd
--- /dev/null
@@ -0,0 +1,160 @@
+diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h
+--- linux-2.6.22-510/include/linux/vserver/network.h   2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/include/linux/vserver/network.h   2008-06-06 17:07:56.000000000 -0400
+@@ -47,6 +47,8 @@
+ #define NXC_TUN_CREATE                0x00000001
+ #define NXC_RAW_ICMP          0x00000100
++#define NXC_RAW_SOCKET                0x00000200
++#define NXC_RAW_SEND          0x00000400
+ /* address types */
+diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h
+--- linux-2.6.22-510/include/net/raw.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/net/raw.h 2008-06-06 17:07:56.000000000 -0400
+@@ -36,7 +36,7 @@
+ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+                                   __be32 raddr, __be32 laddr,
+-                                  int dif);
++                                  int dif, int tag);
+ extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+
+diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c
+--- linux-2.6.22-510/net/core/sock.c   2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/core/sock.c   2008-06-06 17:07:56.000000000 -0400
+@@ -444,6 +444,19 @@
+               }
+               goto set_sndbuf;
++      case SO_SETXID:
++              if (current_vx_info()) {
++                      ret = -EPERM;
++                      break;
++              }
++              if (val < 0 || val > MAX_S_CONTEXT) {
++                      ret = -EINVAL;
++                      break;
++              }
++              sk->sk_xid = val;
++              sk->sk_nid = val;
++              break;
++
+       case SO_RCVBUF:
+               /* Don't error on this BSD doesn't and if you think
+                  about it this is right. Otherwise apps have to
+@@ -573,7 +586,7 @@
+               char devname[IFNAMSIZ];
+               /* Sorry... */
+-              if (!capable(CAP_NET_RAW)) {
++              if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+                       ret = -EPERM;
+                       break;
+               }
+diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
+--- linux-2.6.22-510/net/ipv4/af_inet.c        2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/af_inet.c        2008-06-06 17:07:56.000000000 -0400
+@@ -312,6 +314,9 @@
+       if ((protocol == IPPROTO_ICMP) &&
+               nx_capable(answer->capability, NXC_RAW_ICMP))
+               goto override;
++      if (sock->type == SOCK_RAW &&
++              nx_capable(answer->capability, NXC_RAW_SOCKET))
++              goto override;
+       if (answer->capability > 0 && !capable(answer->capability))
+               goto out_rcu_unlock;
+ override:
+diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c
+--- linux-2.6.22-510/net/ipv4/icmp.c   2008-06-06 17:07:55.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/icmp.c   2008-06-06 17:07:56.000000000 -0400
+@@ -709,7 +709,7 @@
+       if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
+               while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
+                                                iph->saddr,
+-                                               skb->dev->ifindex)) != NULL) {
++                                               skb->dev->ifindex, skb->skb_tag)) != NULL) {
+                       raw_err(raw_sk, skb, info);
+                       raw_sk = sk_next(raw_sk);
+                       iph = (struct iphdr *)skb->data;
+diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c
+--- linux-2.6.22-510/net/ipv4/ip_options.c     2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/ip_options.c     2008-06-06 17:07:56.000000000 -0400
+@@ -409,7 +409,7 @@
+                                       optptr[2] += 8;
+                                       break;
+                                     default:
+-                                      if (!skb && !capable(CAP_NET_RAW)) {
++                                      if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+                                               pp_ptr = optptr + 3;
+                                               goto error;
+                                       }
+@@ -445,7 +445,7 @@
+                               opt->router_alert = optptr - iph;
+                       break;
+                     case IPOPT_CIPSO:
+-                      if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
++                      if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) {
+                               pp_ptr = optptr;
+                               goto error;
+                       }
+@@ -458,7 +458,7 @@
+                     case IPOPT_SEC:
+                     case IPOPT_SID:
+                     default:
+-                      if (!skb && !capable(CAP_NET_RAW)) {
++                      if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+                               pp_ptr = optptr;
+                               goto error;
+                       }
+diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c
+--- linux-2.6.22-510/net/ipv4/raw.c    2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/raw.c    2008-06-06 17:07:56.000000000 -0400
+@@ -103,7 +103,7 @@
+ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+                            __be32 raddr, __be32 laddr,
+-                           int dif)
++                           int dif, int tag)
+ {
+       struct hlist_node *node;
+@@ -112,6 +112,7 @@
+               if (inet->num == num                                    &&
+                   !(inet->daddr && inet->daddr != raddr)              &&
++                  (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag)  &&
+                   v4_sock_addr_match(sk->sk_nx_info, inet, laddr)     &&
+                   !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+                       goto found; /* gotcha */
+@@ -161,7 +162,7 @@
+               goto out;
+       sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
+                            iph->saddr, iph->daddr,
+-                           skb->dev->ifindex);
++                           skb->dev->ifindex, skb->skb_tag);
+       while (sk) {
+               delivered = 1;
+@@ -174,7 +175,7 @@
+               }
+               sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
+                                    iph->saddr, iph->daddr,
+-                                   skb->dev->ifindex);
++                                   skb->dev->ifindex, skb->skb_tag);
+       }
+ out:
+       read_unlock(&raw_v4_lock);
+@@ -315,7 +316,7 @@
+       }
+       err = -EPERM;
+-      if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
++      if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) &&
+               sk->sk_nx_info &&
+               !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
+               goto error_free;
+
+
diff --git a/linux-2.6-524-peercred.patch b/linux-2.6-524-peercred.patch
new file mode 100644 (file)
index 0000000..3cc87e8
--- /dev/null
@@ -0,0 +1,14 @@
+diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h
+--- linux-2.6.22-510/include/linux/socket.h    2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/socket.h    2008-06-06 17:07:56.000000000 -0400
+@@ -288,6 +288,8 @@
+ #define SOL_TIPC      271
+ #define SOL_RXRPC     272
++#define SO_SETXID     SO_PEERCRED
++
+ /* IPX options */
+ #define IPX_TYPE      1
+
+
diff --git a/linux-2.6-525-sknid-elevator.patch b/linux-2.6-525-sknid-elevator.patch
new file mode 100644 (file)
index 0000000..0ff4d8a
--- /dev/null
@@ -0,0 +1,185 @@
+diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c
+--- linux-2.6.22-510/net/core/dev.c    2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/core/dev.c    2008-06-06 17:07:56.000000000 -0400
+@@ -1803,6 +1803,7 @@
+  * the ingress scheduler, you just cant add policies on ingress.
+  *
+  */
++
+ static int ing_filter(struct sk_buff *skb)
+ {
+       struct Qdisc *q;
+@@ -1832,13 +1833,20 @@
+ }
+ #endif
++/* The code already makes the assumption that packet handlers run
++ * sequentially on the same CPU. -Sapan */
++DEFINE_PER_CPU(int, sknid_elevator);
++
+ int netif_receive_skb(struct sk_buff *skb)
+ {
+       struct packet_type *ptype, *pt_prev;
+       struct net_device *orig_dev;
+       int ret = NET_RX_DROP;
++      int *cur_elevator=&__get_cpu_var(sknid_elevator);
+       __be16 type;
++      *cur_elevator = 0;
++
+       /* if we've gotten here through NAPI, check netpoll */
+       if (skb->dev->poll && netpoll_rx(skb))
+               return NET_RX_DROP;
+@@ -1873,8 +1881,9 @@
+       list_for_each_entry_rcu(ptype, &ptype_all, list) {
+               if (!ptype->dev || ptype->dev == skb->dev) {
+-                      if (pt_prev)
++                      if (pt_prev) {
+                               ret = deliver_skb(skb, pt_prev, orig_dev);
++                      }
+                       pt_prev = ptype;
+               }
+       }
+@@ -1912,8 +1921,22 @@
+               }
+       }
++      /* We don't want the packet handlers to throw the packet away
++       * if we want the taps to treat it again - Sapan */
++      if (*cur_elevator) {
++              atomic_inc(&skb->users);
++      }
++
+       if (pt_prev) {
+               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++                      if (*cur_elevator > 0) {
++                              skb->skb_tag = *cur_elevator;
++                              list_for_each_entry_rcu(ptype, &ptype_all, list) {
++                                      if (!ptype->dev || ptype->dev == skb->dev) {
++                                                      ret = deliver_skb(skb, ptype, orig_dev);
++                                      }
++                              }
++                      }
+       } else {
+               kfree_skb(skb);
+               /* Jamal, now you will not able to escape explaining
+@@ -1922,6 +1945,13 @@
+               ret = NET_RX_DROP;
+       }
++      if (*cur_elevator) {
++              /* We have a packet */
++              kfree_skb(skb);
++      }
++
++      *cur_elevator=0;
++
+ out:
+       rcu_read_unlock();
+       return ret;
+@@ -3780,6 +3810,7 @@
+ EXPORT_SYMBOL(net_enable_timestamp);
+ EXPORT_SYMBOL(net_disable_timestamp);
+ EXPORT_SYMBOL(dev_get_flags);
++EXPORT_PER_CPU_SYMBOL(sknid_elevator);
+ #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+ EXPORT_SYMBOL(br_handle_frame_hook);
+diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c
+--- linux-2.6.22-510/net/packet/af_packet.c    2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/packet/af_packet.c    2008-06-07 18:30:41.000000000 -0400
+@@ -78,6 +78,7 @@
+ #include <linux/poll.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/vs_network.h>
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+@@ -246,10 +247,13 @@
+ static const struct proto_ops packet_ops_spkt;
++extern DEFINE_PER_CPU(int, sknid_elevator);
+ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
+ {
+       struct sock *sk;
+       struct sockaddr_pkt *spkt;
++      int tag = skb->skb_tag;
++      int *elevator=&__get_cpu_var(sknid_elevator);
+       /*
+        *      When we registered the protocol we saved the socket in the data
+@@ -269,6 +273,22 @@
+        *      so that this procedure is noop.
+        */
++      /* 
++       * (18:05:41) daniel_hozac: where?
++       * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
++       * (18:05:58) er: in packet_rcv_skpt
++       * (18:07:33) daniel_hozac: oh, that's evil. 
++       */
++
++      if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
++              *elevator=-2;
++              goto out;
++      }
++      else if (!sk->sk_nx_info && *elevator) {
++              /* Root has already seen this packet */
++              goto out;
++      }
++
+       if (skb->pkt_type == PACKET_LOOPBACK)
+               goto out;
+@@ -324,6 +344,9 @@
+       __be16 proto=0;
+       int err;
++      if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
++              return -EPERM;
++
+       /*
+        *      Get and verify the address.
+        */
+@@ -420,6 +443,17 @@
+                                     unsigned int res)
+ {
+       struct sk_filter *filter;
++      int tag = skb->skb_tag;
++      int *elevator=&__get_cpu_var(sknid_elevator);
++
++      if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
++              *elevator=-2;
++              return 0;
++      }
++      else if (!sk->sk_nx_info && *elevator) {
++              /* Root has already seen this packet */
++              return 0;
++      }
+       rcu_read_lock_bh();
+       filter = rcu_dereference(sk->sk_filter);
+@@ -711,6 +745,9 @@
+       unsigned char *addr;
+       int ifindex, err, reserve = 0;
++      if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
++              return -EPERM;
++
+       /*
+        *      Get and verify the address.
+        */
+@@ -984,8 +1021,9 @@
+       __be16 proto = (__force __be16)protocol; /* weird, but documented */
+       int err;
+-      if (!capable(CAP_NET_RAW))
++      if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
+               return -EPERM;
++              
+       if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
+           sock->type != SOCK_PACKET)
+               return -ESOCKTNOSUPPORT;