Patch500: linux-2.6-500-vserver-filesharing.patch
Patch510: linux-2.6-510-ipod.patch
Patch520: linux-2.6-520-vnet+.patch
+Patch521: linux-2.6-521-packet-tagging.patch
+Patch522: linux-2.6-522-iptables-connection-tagging.patch
+Patch523: linux-2.6-523-raw-sockets.patch
+Patch524: linux-2.6-524-peercred.patch
+Patch525: linux-2.6-525-sknid-elevator.patch
Patch530: linux-2.6-530-built-by-support.patch
Patch540: linux-2.6-540-oom-kill.patch
Patch550: linux-2.6-550-raise-default-nfile-ulimit.patch
%ApplyPatch 500
%ApplyPatch 510
-%ApplyPatch 520
+
+# VNET+ series
+%ApplyPatch 521
+%ApplyPatch 522
+%ApplyPatch 523
+%ApplyPatch 524
+%ApplyPatch 525
+
%ApplyPatch 530
%ApplyPatch 540
%ApplyPatch 550
--- /dev/null
+diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h
+--- linux-2.6.22-510/include/linux/skbuff.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/skbuff.h 2008-06-06 17:07:56.000000000 -0400
+@@ -302,6 +302,7 @@
+ #endif
+
+ __u32 mark;
++#define skb_tag mark
+
+ sk_buff_data_t transport_header;
+ sk_buff_data_t network_header;
+diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c
+--- linux-2.6.22-510/net/core/skbuff.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/core/skbuff.c 2008-06-06 17:07:56.000000000 -0400
+@@ -56,6 +56,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/init.h>
+ #include <linux/scatterlist.h>
++#include <linux/vs_network.h>
+
+ #include <net/protocol.h>
+ #include <net/dst.h>
+@@ -174,6 +175,7 @@
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
++ if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0;
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ atomic_set(&shinfo->dataref, 1);
+@@ -443,6 +445,8 @@
+ C(tail);
+ C(end);
+
++ /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */
++
+ atomic_inc(&(skb_shinfo(skb)->dataref));
+ skb->cloned = 1;
+
+@@ -492,6 +496,7 @@
+ new->tc_index = old->tc_index;
+ #endif
+ skb_copy_secmark(new, old);
++ new->skb_tag = old->skb_tag;
+ atomic_set(&new->users, 1);
+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
+--- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400
+@@ -178,6 +178,8 @@
+ return -EAGAIN;
+ }
+ inet->sport = htons(inet->num);
++ sk->sk_xid = vx_current_xid();
++ if (!in_interrupt()) sk->sk_nid = nx_current_nid(); else sk->sk_nid=0;
+ }
+ release_sock(sk);
+ return 0;
+diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:43.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:56.000000000 -0400
+@@ -49,6 +49,8 @@
+ else
+ logflags = NF_LOG_MASK;
+
++ printk("TAG=%d ", skb->skb_tag);
++
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ printk("TRUNCATED");
+
--- /dev/null
+diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h
+--- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h 2008-06-06 17:07:56.000000000 -0400
+@@ -11,6 +11,7 @@
+ XT_MARK_SET=0,
+ XT_MARK_AND,
+ XT_MARK_OR,
++ XT_MARK_COPYXID,
+ };
+
+ struct xt_mark_target_info_v1 {
+diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h
+--- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h 2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,14 @@
++#ifndef _XT_SETXID_H_target
++#define _XT_SETXID_H_target
++
++/* Version 1 */
++enum {
++ XT_SET_PACKET_XID=0
++};
++
++struct xt_setxid_target_info_v1 {
++ unsigned long mark;
++ u_int8_t mode;
++};
++
++#endif /*_XT_SETXID_H_target*/
+diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h
+--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h 2008-06-06 17:07:56.000000000 -0400
+@@ -12,6 +12,7 @@
+ #define IPT_MARK_SET XT_MARK_SET
+ #define IPT_MARK_AND XT_MARK_AND
+ #define IPT_MARK_OR XT_MARK_OR
++#define IPT_MARK_COPYXID XT_MARK_COPYXID
+
+ #define ipt_mark_target_info_v1 xt_mark_target_info_v1
+
+diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h
+--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,13 @@
++#ifndef _IPT_SETXID_H_target
++#define _IPT_SETXID_H_target
++
++/* Backwards compatibility for old userspace */
++
++#include <linux/netfilter/xt_SETXID.h>
++
++/* Version 1 */
++#define IPT_SET_PACKET_XID XT_SET_PACKET_XID
++
++#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
++
++#endif /*_IPT_SETXID_H_target*/
+diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h
+--- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h 2008-06-06 17:07:56.000000000 -0400
+@@ -131,6 +131,9 @@
+ /* Storage reserved for other modules: */
+ union nf_conntrack_proto proto;
+
++ /* PLANETLAB. VNET-specific */
++ int xid[IP_CT_DIR_MAX];
++
+ /* features dynamically at the end: helper, nat (both optional) */
+ char data[0];
+ };
+diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig
+--- linux-2.6.22-510/net/netfilter/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/Kconfig 2008-06-06 17:07:56.000000000 -0400
+@@ -389,6 +389,13 @@
+
+ To compile it as a module, choose M here. If unsure, say N.
+
++config NETFILTER_XT_TARGET_SETXID
++ tristate '"SETXID" target support'
++ depends on NETFILTER_XTABLES
++ help
++ This option adds a `SETXID' target, which allows you to alter the
++ xid of a socket.
++
+ config NETFILTER_XT_MATCH_COMMENT
+ tristate '"comment" match support'
+ depends on NETFILTER_XTABLES
+diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile
+--- linux-2.6.22-510/net/netfilter/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/Makefile 2008-06-06 17:07:56.000000000 -0400
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
+ # targets
++obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c
+--- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c 2008-06-06 17:07:56.000000000 -0400
+@@ -726,6 +726,8 @@
+
+ /* Overload tuple linked list to put us in unconfirmed list. */
+ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
++ conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
++ conntrack->xid[IP_CT_DIR_REPLY] = -1;
+
+ write_unlock_bh(&nf_conntrack_lock);
+
+diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c
+--- linux-2.6.22-510/net/netfilter/xt_MARK.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/netfilter/xt_MARK.c 2008-06-07 17:55:26.000000000 -0400
+@@ -5,13 +5,18 @@
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
++ *
+ */
+
+ #include <linux/module.h>
++#include <linux/version.h>
+ #include <linux/skbuff.h>
+ #include <linux/ip.h>
+ #include <net/checksum.h>
++#include <net/route.h>
++#include <net/inet_hashtables.h>
+
++#include <net/netfilter/nf_conntrack.h>
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter/xt_MARK.h>
+
+@@ -21,6 +26,48 @@
+ MODULE_ALIAS("ipt_MARK");
+ MODULE_ALIAS("ip6t_MARK");
+
++static inline u_int16_t
++get_dst_port(struct nf_conntrack_tuple *tuple)
++{
++ switch (tuple->dst.protonum) {
++ case IPPROTO_GRE:
++ /* XXX Truncate 32-bit GRE key to 16 bits */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
++ return tuple->dst.u.gre.key;
++#else
++ return htons(ntohl(tuple->dst.u.gre.key));
++#endif
++ case IPPROTO_ICMP:
++ /* Bind on ICMP echo ID */
++ return tuple->src.u.icmp.id;
++ case IPPROTO_TCP:
++ return tuple->dst.u.tcp.port;
++ case IPPROTO_UDP:
++ return tuple->dst.u.udp.port;
++ default:
++ return tuple->dst.u.all;
++ }
++}
++
++static inline u_int16_t
++get_src_port(struct nf_conntrack_tuple *tuple)
++{
++ switch (tuple->dst.protonum) {
++ case IPPROTO_GRE:
++ /* XXX Truncate 32-bit GRE key to 16 bits */
++ return htons(ntohl(tuple->src.u.gre.key));
++ case IPPROTO_ICMP:
++ /* Bind on ICMP echo ID */
++ return tuple->src.u.icmp.id;
++ case IPPROTO_TCP:
++ return tuple->src.u.tcp.port;
++ case IPPROTO_UDP:
++ return tuple->src.u.udp.port;
++ default:
++ return tuple->src.u.all;
++ }
++}
++
+ static unsigned int
+ target_v0(struct sk_buff **pskb,
+ const struct net_device *in,
+@@ -35,6 +82,8 @@
+ return XT_CONTINUE;
+ }
+
++extern DEFINE_PER_CPU(int, sknid_elevator);
++
+ static unsigned int
+ target_v1(struct sk_buff **pskb,
+ const struct net_device *in,
+@@ -44,7 +93,20 @@
+ const void *targinfo)
+ {
+ const struct xt_mark_target_info_v1 *markinfo = targinfo;
+- int mark = 0;
++ enum ip_conntrack_info ctinfo;
++ struct sock *connection_sk;
++ int dif;
++ struct nf_conn *ct;
++ extern struct inet_hashinfo tcp_hashinfo;
++ enum ip_conntrack_dir dir;
++ int *curtag;
++ u_int32_t src_ip;
++ u_int32_t dst_ip;
++ u_int16_t proto, src_port;
++ u_int32_t ip;
++ u_int16_t port;
++
++ int mark = -1;
+
+ switch (markinfo->mode) {
+ case XT_MARK_SET:
+@@ -58,13 +120,74 @@
+ case XT_MARK_OR:
+ mark = (*pskb)->mark | markinfo->mark;
+ break;
++
++ case XT_MARK_COPYXID:
++
++ ct = nf_ct_get((*pskb), &ctinfo);
++ if (!ct)
++ break;
++
++ dir = CTINFO2DIR(ctinfo);
++ src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
++ dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++ src_port = get_src_port(&ct->tuplehash[dir].tuple);
++ proto = ct->tuplehash[dir].tuple.dst.protonum;
++
++ dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
++ ip = ct->tuplehash[dir].tuple.dst.u3.ip;
++ port = get_dst_port(&ct->tuplehash[dir].tuple);
++
++ if (proto == 1 || proto == 17) {
++ if ((*pskb)->mark>0) /* The packet is marked, it's going out */
++ {
++ //if (ct->xid[0]>0 && ct->xid[0]!=(*pskb)->mark)
++ /*printk(KERN_CRIT "xt_MARK log: %d/%d/%d/%d\n",ct->xid[0],(*pskb)->mark,hooknum==NF_IP_LOCAL_IN,proto);*/
++
++ ct->xid[0]=(*pskb)->mark;
+ }
+
++ if (ct->xid[0] > 0) {
++ mark = ct->xid[0];
++ }
++
++ }
++ else if (proto == 6) {
++ if ((*pskb)->sk) {
++ connection_sk = (*pskb)->sk;
++ sock_hold(connection_sk);
++ }
++ else
++ connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++
++
++ if (connection_sk) {
++ if (connection_sk->sk_state == TCP_TIME_WAIT) {
++ inet_twsk_put(inet_twsk(connection_sk));
++ break;
++ }
++ connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
++ ct->xid[!dir]=connection_sk->sk_nid;
++ if (connection_sk->sk_nid != 0)
++ mark = connection_sk->sk_nid;
++ sock_put(connection_sk);
++ }
++ else
++ mark = -1 ;
++ }
++ break;
++ }
++ if (mark != -1) {
+ (*pskb)->mark = mark;
++ }
++
++ curtag=&__get_cpu_var(sknid_elevator);
++ if (mark > 0 && *curtag==-2)
++ {
++ *curtag = mark;
++ }
+ return XT_CONTINUE;
+ }
+
+-
+ static int
+ checkentry_v0(const char *tablename,
+ const void *entry,
+@@ -92,7 +215,8 @@
+
+ if (markinfo->mode != XT_MARK_SET
+ && markinfo->mode != XT_MARK_AND
+- && markinfo->mode != XT_MARK_OR) {
++ && markinfo->mode != XT_MARK_OR
++ && markinfo->mode != XT_MARK_COPYXID) {
+ printk(KERN_WARNING "MARK: unknown mode %u\n",
+ markinfo->mode);
+ return 0;
+diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c
+--- linux-2.6.22-510/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-520/net/netfilter/xt_SETXID.c 2008-06-06 17:07:56.000000000 -0400
+@@ -0,0 +1,79 @@
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/ip.h>
++#include <net/checksum.h>
++#include <linux/vs_network.h>
++
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_SETXID.h>
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("");
++MODULE_DESCRIPTION("");
++MODULE_ALIAS("ipt_SETXID");
++
++static unsigned int
++target_v1(struct sk_buff **pskb,
++ const struct net_device *in,
++ const struct net_device *out,
++ unsigned int hooknum,
++ const struct xt_target *target,
++ const void *targinfo)
++{
++ const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
++
++ switch (setxidinfo->mode) {
++ case XT_SET_PACKET_XID:
++ (*pskb)->skb_tag = setxidinfo->mark;
++ break;
++ }
++ return XT_CONTINUE;
++}
++
++
++static int
++checkentry_v1(const char *tablename,
++ const void *entry,
++ const struct xt_target *target,
++ void *targinfo,
++ unsigned int hook_mask)
++{
++ struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
++
++ if (setxidinfo->mode != XT_SET_PACKET_XID) {
++ printk(KERN_WARNING "SETXID: unknown mode %u\n",
++ setxidinfo->mode);
++ return 0;
++ }
++
++ return 1;
++}
++
++static struct xt_target xt_setxid_target[] = {
++ {
++ .name = "SETXID",
++ .family = AF_INET,
++ .revision = 1,
++ .checkentry = checkentry_v1,
++ .target = target_v1,
++ .targetsize = sizeof(struct xt_setxid_target_info_v1),
++ .table = "mangle",
++ .me = THIS_MODULE,
++ }
++};
++
++static int __init init(void)
++{
++ int err;
++
++ err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
++ return err;
++}
++
++static void __exit fini(void)
++{
++ xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
++}
++
++module_init(init);
++module_exit(fini);
+
--- /dev/null
+diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h
+--- linux-2.6.22-510/include/linux/vserver/network.h 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/include/linux/vserver/network.h 2008-06-06 17:07:56.000000000 -0400
+@@ -47,6 +47,8 @@
+ #define NXC_TUN_CREATE 0x00000001
+
+ #define NXC_RAW_ICMP 0x00000100
++#define NXC_RAW_SOCKET 0x00000200
++#define NXC_RAW_SEND 0x00000400
+
+
+ /* address types */
+diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h
+--- linux-2.6.22-510/include/net/raw.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/net/raw.h 2008-06-06 17:07:56.000000000 -0400
+@@ -36,7 +36,7 @@
+
+ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr,
+- int dif);
++ int dif, int tag);
+
+ extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+
+diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c
+--- linux-2.6.22-510/net/core/sock.c 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/core/sock.c 2008-06-06 17:07:56.000000000 -0400
+@@ -444,6 +444,19 @@
+ }
+ goto set_sndbuf;
+
++ case SO_SETXID:
++ if (current_vx_info()) {
++ ret = -EPERM;
++ break;
++ }
++ if (val < 0 || val > MAX_S_CONTEXT) {
++ ret = -EINVAL;
++ break;
++ }
++ sk->sk_xid = val;
++ sk->sk_nid = val;
++ break;
++
+ case SO_RCVBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+@@ -573,7 +586,7 @@
+ char devname[IFNAMSIZ];
+
+ /* Sorry... */
+- if (!capable(CAP_NET_RAW)) {
++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+ ret = -EPERM;
+ break;
+ }
+diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
+--- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400
+@@ -312,6 +314,9 @@
+ if ((protocol == IPPROTO_ICMP) &&
+ nx_capable(answer->capability, NXC_RAW_ICMP))
+ goto override;
++ if (sock->type == SOCK_RAW &&
++ nx_capable(answer->capability, NXC_RAW_SOCKET))
++ goto override;
+ if (answer->capability > 0 && !capable(answer->capability))
+ goto out_rcu_unlock;
+ override:
+diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c
+--- linux-2.6.22-510/net/ipv4/icmp.c 2008-06-06 17:07:55.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/icmp.c 2008-06-06 17:07:56.000000000 -0400
+@@ -709,7 +709,7 @@
+ if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
+ while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
+ iph->saddr,
+- skb->dev->ifindex)) != NULL) {
++ skb->dev->ifindex, skb->skb_tag)) != NULL) {
+ raw_err(raw_sk, skb, info);
+ raw_sk = sk_next(raw_sk);
+ iph = (struct iphdr *)skb->data;
+diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c
+--- linux-2.6.22-510/net/ipv4/ip_options.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/ip_options.c 2008-06-06 17:07:56.000000000 -0400
+@@ -409,7 +409,7 @@
+ optptr[2] += 8;
+ break;
+ default:
+- if (!skb && !capable(CAP_NET_RAW)) {
++ if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+ pp_ptr = optptr + 3;
+ goto error;
+ }
+@@ -445,7 +445,7 @@
+ opt->router_alert = optptr - iph;
+ break;
+ case IPOPT_CIPSO:
+- if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
++ if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) {
+ pp_ptr = optptr;
+ goto error;
+ }
+@@ -458,7 +458,7 @@
+ case IPOPT_SEC:
+ case IPOPT_SID:
+ default:
+- if (!skb && !capable(CAP_NET_RAW)) {
++ if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
+ pp_ptr = optptr;
+ goto error;
+ }
+diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c
+--- linux-2.6.22-510/net/ipv4/raw.c 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/ipv4/raw.c 2008-06-06 17:07:56.000000000 -0400
+@@ -103,7 +103,7 @@
+
+ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr,
+- int dif)
++ int dif, int tag)
+ {
+ struct hlist_node *node;
+
+@@ -112,6 +112,7 @@
+
+ if (inet->num == num &&
+ !(inet->daddr && inet->daddr != raddr) &&
++ (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) &&
+ v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
+ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ goto found; /* gotcha */
+@@ -161,7 +162,7 @@
+ goto out;
+ sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
+ iph->saddr, iph->daddr,
+- skb->dev->ifindex);
++ skb->dev->ifindex, skb->skb_tag);
+
+ while (sk) {
+ delivered = 1;
+@@ -174,7 +175,7 @@
+ }
+ sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
+ iph->saddr, iph->daddr,
+- skb->dev->ifindex);
++ skb->dev->ifindex, skb->skb_tag);
+ }
+ out:
+ read_unlock(&raw_v4_lock);
+@@ -315,7 +316,7 @@
+ }
+
+ err = -EPERM;
+- if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
++ if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) &&
+ sk->sk_nx_info &&
+ !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
+ goto error_free;
+
+
--- /dev/null
+diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h
+--- linux-2.6.22-510/include/linux/socket.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/include/linux/socket.h 2008-06-06 17:07:56.000000000 -0400
+@@ -288,6 +288,8 @@
+ #define SOL_TIPC 271
+ #define SOL_RXRPC 272
+
++#define SO_SETXID SO_PEERCRED
++
+ /* IPX options */
+ #define IPX_TYPE 1
+
+
+
--- /dev/null
+diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c
+--- linux-2.6.22-510/net/core/dev.c 2008-06-06 17:07:48.000000000 -0400
++++ linux-2.6.22-520/net/core/dev.c 2008-06-06 17:07:56.000000000 -0400
+@@ -1803,6 +1803,7 @@
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
++
+ static int ing_filter(struct sk_buff *skb)
+ {
+ struct Qdisc *q;
+@@ -1832,13 +1833,20 @@
+ }
+ #endif
+
++/* The code already makes the assumption that packet handlers run
++ * sequentially on the same CPU. -Sapan */
++DEFINE_PER_CPU(int, sknid_elevator);
++
+ int netif_receive_skb(struct sk_buff *skb)
+ {
+ struct packet_type *ptype, *pt_prev;
+ struct net_device *orig_dev;
+ int ret = NET_RX_DROP;
++ int *cur_elevator=&__get_cpu_var(sknid_elevator);
+ __be16 type;
+
++ *cur_elevator = 0;
++
+ /* if we've gotten here through NAPI, check netpoll */
+ if (skb->dev->poll && netpoll_rx(skb))
+ return NET_RX_DROP;
+@@ -1873,8 +1881,9 @@
+
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if (!ptype->dev || ptype->dev == skb->dev) {
+- if (pt_prev)
++ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev, orig_dev);
++ }
+ pt_prev = ptype;
+ }
+ }
+@@ -1912,8 +1921,22 @@
+ }
+ }
+
++ /* We don't want the packet handlers to throw the packet away
++ * if we want the taps to treat it again - Sapan */
++ if (*cur_elevator) {
++ atomic_inc(&skb->users);
++ }
++
+ if (pt_prev) {
+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++ if (*cur_elevator > 0) {
++ skb->skb_tag = *cur_elevator;
++ list_for_each_entry_rcu(ptype, &ptype_all, list) {
++ if (!ptype->dev || ptype->dev == skb->dev) {
++ ret = deliver_skb(skb, ptype, orig_dev);
++ }
++ }
++ }
+ } else {
+ kfree_skb(skb);
+ /* Jamal, now you will not able to escape explaining
+@@ -1922,6 +1945,13 @@
+ ret = NET_RX_DROP;
+ }
+
++ if (*cur_elevator) {
++ /* We have a packet */
++ kfree_skb(skb);
++ }
++
++ *cur_elevator=0;
++
+ out:
+ rcu_read_unlock();
+ return ret;
+@@ -3780,6 +3810,7 @@
+ EXPORT_SYMBOL(net_enable_timestamp);
+ EXPORT_SYMBOL(net_disable_timestamp);
+ EXPORT_SYMBOL(dev_get_flags);
++EXPORT_PER_CPU_SYMBOL(sknid_elevator);
+
+ #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+ EXPORT_SYMBOL(br_handle_frame_hook);
+diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c
+--- linux-2.6.22-510/net/packet/af_packet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-520/net/packet/af_packet.c 2008-06-07 18:30:41.000000000 -0400
+@@ -78,6 +78,7 @@
+ #include <linux/poll.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/vs_network.h>
+
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+@@ -246,10 +247,13 @@
+
+ static const struct proto_ops packet_ops_spkt;
+
++extern DEFINE_PER_CPU(int, sknid_elevator);
+ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+ {
+ struct sock *sk;
+ struct sockaddr_pkt *spkt;
++ int tag = skb->skb_tag;
++ int *elevator=&__get_cpu_var(sknid_elevator);
+
+ /*
+ * When we registered the protocol we saved the socket in the data
+@@ -269,6 +273,22 @@
+ * so that this procedure is noop.
+ */
+
++ /*
++ * (18:05:41) daniel_hozac: where?
++ * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
++ * (18:05:58) er: in packet_rcv_skpt
++ * (18:07:33) daniel_hozac: oh, that's evil.
++ */
++
++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
++ *elevator=-2;
++ goto out;
++ }
++ else if (!sk->sk_nx_info && *elevator) {
++ /* Root has already seen this packet */
++ goto out;
++ }
++
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ goto out;
+
+@@ -324,6 +344,9 @@
+ __be16 proto=0;
+ int err;
+
++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
++ return -EPERM;
++
+ /*
+ * Get and verify the address.
+ */
+@@ -420,6 +443,17 @@
+ unsigned int res)
+ {
+ struct sk_filter *filter;
++ int tag = skb->skb_tag;
++ int *elevator=&__get_cpu_var(sknid_elevator);
++
++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
++ *elevator=-2;
++ return 0;
++ }
++ else if (!sk->sk_nx_info && *elevator) {
++ /* Root has already seen this packet */
++ return 0;
++ }
+
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+@@ -711,6 +745,9 @@
+ unsigned char *addr;
+ int ifindex, err, reserve = 0;
+
++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
++ return -EPERM;
++
+ /*
+ * Get and verify the address.
+ */
+@@ -984,8 +1021,9 @@
+ __be16 proto = (__force __be16)protocol; /* weird, but documented */
+ int err;
+
+- if (!capable(CAP_NET_RAW))
++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
+ return -EPERM;
++
+ if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
+ sock->type != SOCK_PACKET)
+ return -ESOCKTNOSUPPORT;