From 15a27320917fe4100c0204359a6a9d18349fb31d Mon Sep 17 00:00:00 2001 From: Sapan Bhatia Date: Wed, 9 Jul 2008 16:36:33 +0000 Subject: [PATCH] * (Cleanup) Split up the VNET+ patch into its 5 component patches * Ran a consistency check on the split-up to make sure we didn't lose anything (to continue...) --- kernel-2.6.spec | 14 +- linux-2.6-521-packet-tagging.patch | 72 ++++ ...-2.6-522-iptables-connection-tagging.patch | 382 ++++++++++++++++++ linux-2.6-523-raw-sockets.patch | 160 ++++++++ linux-2.6-524-peercred.patch | 14 + linux-2.6-525-sknid-elevator.patch | 185 +++++++++ 6 files changed, 826 insertions(+), 1 deletion(-) create mode 100644 linux-2.6-521-packet-tagging.patch create mode 100644 linux-2.6-522-iptables-connection-tagging.patch create mode 100644 linux-2.6-523-raw-sockets.patch create mode 100644 linux-2.6-524-peercred.patch create mode 100644 linux-2.6-525-sknid-elevator.patch diff --git a/kernel-2.6.spec b/kernel-2.6.spec index 0cd576b1b..536dcf48a 100644 --- a/kernel-2.6.spec +++ b/kernel-2.6.spec @@ -163,6 +163,11 @@ Patch250: linux-2.6-250-ipsets.patch Patch500: linux-2.6-500-vserver-filesharing.patch Patch510: linux-2.6-510-ipod.patch Patch520: linux-2.6-520-vnet+.patch +Patch521: linux-2.6-521-packet-tagging.patch +Patch522: linux-2.6-522-iptables-connection-tagging.patch +Patch523: linux-2.6-523-raw-sockets.patch +Patch524: linux-2.6-524-peercred.patch +Patch525: linux-2.6-525-sknid-elevator.patch Patch530: linux-2.6-530-built-by-support.patch Patch540: linux-2.6-540-oom-kill.patch Patch550: linux-2.6-550-raise-default-nfile-ulimit.patch @@ -357,7 +362,14 @@ KERNEL_PREVIOUS=vanilla %ApplyPatch 500 %ApplyPatch 510 -%ApplyPatch 520 + +# VNET+ series +%ApplyPatch 521 +%ApplyPatch 522 +%ApplyPatch 523 +%ApplyPatch 524 +%ApplyPatch 525 + %ApplyPatch 530 %ApplyPatch 540 %ApplyPatch 550 diff --git a/linux-2.6-521-packet-tagging.patch b/linux-2.6-521-packet-tagging.patch new file mode 100644 index 000000000..5fcc1cffd --- /dev/null +++ b/linux-2.6-521-packet-tagging.patch @@ -0,0 +1,72 @@ +diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h +--- linux-2.6.22-510/include/linux/skbuff.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/linux/skbuff.h 2008-06-06 17:07:56.000000000 -0400 +@@ -302,6 +302,7 @@ + #endif + + __u32 mark; ++#define skb_tag mark + + sk_buff_data_t transport_header; + sk_buff_data_t network_header; +diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c +--- linux-2.6.22-510/net/core/skbuff.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/core/skbuff.c 2008-06-06 17:07:56.000000000 -0400 +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -174,6 +175,7 @@ + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; ++ if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0; + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); +@@ -443,6 +445,8 @@ + C(tail); + C(end); + ++ /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */ ++ + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; + +@@ -492,6 +496,7 @@ + new->tc_index = old->tc_index; + #endif + skb_copy_secmark(new, old); ++ new->skb_tag = old->skb_tag; + atomic_set(&new->users, 1); + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; +diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c +--- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400 +@@ -178,6 +178,8 @@ + return -EAGAIN; + } + inet->sport = htons(inet->num); ++ sk->sk_xid = vx_current_xid(); ++ if (!in_interrupt()) sk->sk_nid = nx_current_nid(); else sk->sk_nid=0; + } + release_sock(sk); + return 0; +diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c +--- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:43.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:56.000000000 -0400 +@@ -49,6 +49,8 @@ + else + logflags = NF_LOG_MASK; + ++ printk("TAG=%d ", skb->skb_tag); ++ + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { + printk("TRUNCATED"); + diff --git a/linux-2.6-522-iptables-connection-tagging.patch b/linux-2.6-522-iptables-connection-tagging.patch new file mode 100644 index 000000000..a3f28a03d --- /dev/null +++ b/linux-2.6-522-iptables-connection-tagging.patch @@ -0,0 +1,382 @@ +diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h +--- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h 2008-06-06 17:07:56.000000000 -0400 +@@ -11,6 +11,7 @@ + XT_MARK_SET=0, + XT_MARK_AND, + XT_MARK_OR, ++ XT_MARK_COPYXID, + }; + + struct xt_mark_target_info_v1 { +diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h +--- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h 2008-06-06 17:07:56.000000000 -0400 +@@ -0,0 +1,14 @@ ++#ifndef _XT_SETXID_H_target ++#define _XT_SETXID_H_target ++ ++/* Version 1 */ ++enum { ++ XT_SET_PACKET_XID=0 ++}; ++ ++struct xt_setxid_target_info_v1 { ++ unsigned long mark; ++ u_int8_t mode; ++}; ++ ++#endif /*_XT_SETXID_H_target*/ +diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h +--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h 2008-06-06 17:07:56.000000000 -0400 +@@ -12,6 +12,7 @@ + #define IPT_MARK_SET XT_MARK_SET + #define IPT_MARK_AND XT_MARK_AND + #define IPT_MARK_OR XT_MARK_OR ++#define IPT_MARK_COPYXID XT_MARK_COPYXID + + #define ipt_mark_target_info_v1 xt_mark_target_info_v1 + +diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h +--- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-06-06 17:07:56.000000000 -0400 +@@ -0,0 +1,13 @@ ++#ifndef _IPT_SETXID_H_target ++#define _IPT_SETXID_H_target ++ ++/* Backwards compatibility for old userspace */ ++ ++#include ++ ++/* Version 1 */ ++#define IPT_SET_PACKET_XID XT_SET_PACKET_XID ++ ++#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1 ++ ++#endif /*_IPT_SETXID_H_target*/ +diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h +--- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h 2008-06-06 17:07:56.000000000 -0400 +@@ -131,6 +131,9 @@ + /* Storage reserved for other modules: */ + union nf_conntrack_proto proto; + ++ /* PLANETLAB. VNET-specific */ ++ int xid[IP_CT_DIR_MAX]; ++ + /* features dynamically at the end: helper, nat (both optional) */ + char data[0]; + }; +diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig +--- linux-2.6.22-510/net/netfilter/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/netfilter/Kconfig 2008-06-06 17:07:56.000000000 -0400 +@@ -389,6 +389,13 @@ + + To compile it as a module, choose M here. If unsure, say N. + ++config NETFILTER_XT_TARGET_SETXID ++ tristate '"SETXID" target support' ++ depends on NETFILTER_XTABLES ++ help ++ This option adds a `SETXID' target, which allows you to alter the ++ xid of a socket. ++ + config NETFILTER_XT_MATCH_COMMENT + tristate '"comment" match support' + depends on NETFILTER_XTABLES +diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile +--- linux-2.6.22-510/net/netfilter/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/netfilter/Makefile 2008-06-06 17:07:56.000000000 -0400 +@@ -37,6 +37,7 @@ + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o + + # targets ++obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o + obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o + obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o +diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c +--- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c 2008-06-06 17:07:56.000000000 -0400 +@@ -726,6 +726,8 @@ + + /* Overload tuple linked list to put us in unconfirmed list. */ + list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); ++ conntrack->xid[IP_CT_DIR_ORIGINAL] = -1; ++ conntrack->xid[IP_CT_DIR_REPLY] = -1; + + write_unlock_bh(&nf_conntrack_lock); + +diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c +--- linux-2.6.22-510/net/netfilter/xt_MARK.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/netfilter/xt_MARK.c 2008-06-07 17:55:26.000000000 -0400 +@@ -5,13 +5,18 @@ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. ++ * + */ + + #include ++#include + #include + #include + #include ++#include ++#include + ++#include + #include + #include + +@@ -21,6 +26,48 @@ + MODULE_ALIAS("ipt_MARK"); + MODULE_ALIAS("ip6t_MARK"); + ++static inline u_int16_t ++get_dst_port(struct nf_conntrack_tuple *tuple) ++{ ++ switch (tuple->dst.protonum) { ++ case IPPROTO_GRE: ++ /* XXX Truncate 32-bit GRE key to 16 bits */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11) ++ return tuple->dst.u.gre.key; ++#else ++ return htons(ntohl(tuple->dst.u.gre.key)); ++#endif ++ case IPPROTO_ICMP: ++ /* Bind on ICMP echo ID */ ++ return tuple->src.u.icmp.id; ++ case IPPROTO_TCP: ++ return tuple->dst.u.tcp.port; ++ case IPPROTO_UDP: ++ return tuple->dst.u.udp.port; ++ default: ++ return tuple->dst.u.all; ++ } ++} ++ ++static inline u_int16_t ++get_src_port(struct nf_conntrack_tuple *tuple) ++{ ++ switch (tuple->dst.protonum) { ++ case IPPROTO_GRE: ++ /* XXX Truncate 32-bit GRE key to 16 bits */ ++ return htons(ntohl(tuple->src.u.gre.key)); ++ case IPPROTO_ICMP: ++ /* Bind on ICMP echo ID */ ++ return tuple->src.u.icmp.id; ++ case IPPROTO_TCP: ++ return tuple->src.u.tcp.port; ++ case IPPROTO_UDP: ++ return tuple->src.u.udp.port; ++ default: ++ return tuple->src.u.all; ++ } ++} ++ + static unsigned int + target_v0(struct sk_buff **pskb, + const struct net_device *in, +@@ -35,6 +82,8 @@ + return XT_CONTINUE; + } + ++extern DEFINE_PER_CPU(int, sknid_elevator); ++ + static unsigned int + target_v1(struct sk_buff **pskb, + const struct net_device *in, +@@ -44,7 +93,20 @@ + const void *targinfo) + { + const struct xt_mark_target_info_v1 *markinfo = targinfo; +- int mark = 0; ++ enum ip_conntrack_info ctinfo; ++ struct sock *connection_sk; ++ int dif; ++ struct nf_conn *ct; ++ extern struct inet_hashinfo tcp_hashinfo; ++ enum ip_conntrack_dir dir; ++ int *curtag; ++ u_int32_t src_ip; ++ u_int32_t dst_ip; ++ u_int16_t proto, src_port; ++ u_int32_t ip; ++ u_int16_t port; ++ ++ int mark = -1; + + switch (markinfo->mode) { + case XT_MARK_SET: +@@ -58,13 +120,74 @@ + case XT_MARK_OR: + mark = (*pskb)->mark | markinfo->mark; + break; ++ ++ case XT_MARK_COPYXID: ++ ++ ct = nf_ct_get((*pskb), &ctinfo); ++ if (!ct) ++ break; ++ ++ dir = CTINFO2DIR(ctinfo); ++ src_ip = ct->tuplehash[dir].tuple.src.u3.ip; ++ dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip; ++ src_port = get_src_port(&ct->tuplehash[dir].tuple); ++ proto = ct->tuplehash[dir].tuple.dst.protonum; ++ ++ dif = ((struct rtable *)(*pskb)->dst)->rt_iif; ++ ip = ct->tuplehash[dir].tuple.dst.u3.ip; ++ port = get_dst_port(&ct->tuplehash[dir].tuple); ++ ++ if (proto == 1 || proto == 17) { ++ if ((*pskb)->mark>0) /* The packet is marked, it's going out */ ++ { ++ //if (ct->xid[0]>0 && ct->xid[0]!=(*pskb)->mark) ++ /*printk(KERN_CRIT "xt_MARK log: %d/%d/%d/%d\n",ct->xid[0],(*pskb)->mark,hooknum==NF_IP_LOCAL_IN,proto);*/ ++ ++ ct->xid[0]=(*pskb)->mark; + } + ++ if (ct->xid[0] > 0) { ++ mark = ct->xid[0]; ++ } ++ ++ } ++ else if (proto == 6) { ++ if ((*pskb)->sk) { ++ connection_sk = (*pskb)->sk; ++ sock_hold(connection_sk); ++ } ++ else ++ connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif); ++ ++ ++ if (connection_sk) { ++ if (connection_sk->sk_state == TCP_TIME_WAIT) { ++ inet_twsk_put(inet_twsk(connection_sk)); ++ break; ++ } ++ connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir]; ++ ct->xid[!dir]=connection_sk->sk_nid; ++ if (connection_sk->sk_nid != 0) ++ mark = connection_sk->sk_nid; ++ sock_put(connection_sk); ++ } ++ else ++ mark = -1 ; ++ } ++ break; ++ } ++ if (mark != -1) { + (*pskb)->mark = mark; ++ } ++ ++ curtag=&__get_cpu_var(sknid_elevator); ++ if (mark > 0 && *curtag==-2) ++ { ++ *curtag = mark; ++ } + return XT_CONTINUE; + } + +- + static int + checkentry_v0(const char *tablename, + const void *entry, +@@ -92,7 +215,8 @@ + + if (markinfo->mode != XT_MARK_SET + && markinfo->mode != XT_MARK_AND +- && markinfo->mode != XT_MARK_OR) { ++ && markinfo->mode != XT_MARK_OR ++ && markinfo->mode != XT_MARK_COPYXID) { + printk(KERN_WARNING "MARK: unknown mode %u\n", + markinfo->mode); + return 0; +diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c +--- linux-2.6.22-510/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-520/net/netfilter/xt_SETXID.c 2008-06-06 17:07:56.000000000 -0400 +@@ -0,0 +1,79 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR(""); ++MODULE_DESCRIPTION(""); ++MODULE_ALIAS("ipt_SETXID"); ++ ++static unsigned int ++target_v1(struct sk_buff **pskb, ++ const struct net_device *in, ++ const struct net_device *out, ++ unsigned int hooknum, ++ const struct xt_target *target, ++ const void *targinfo) ++{ ++ const struct xt_setxid_target_info_v1 *setxidinfo = targinfo; ++ ++ switch (setxidinfo->mode) { ++ case XT_SET_PACKET_XID: ++ (*pskb)->skb_tag = setxidinfo->mark; ++ break; ++ } ++ return XT_CONTINUE; ++} ++ ++ ++static int ++checkentry_v1(const char *tablename, ++ const void *entry, ++ const struct xt_target *target, ++ void *targinfo, ++ unsigned int hook_mask) ++{ ++ struct xt_setxid_target_info_v1 *setxidinfo = targinfo; ++ ++ if (setxidinfo->mode != XT_SET_PACKET_XID) { ++ printk(KERN_WARNING "SETXID: unknown mode %u\n", ++ setxidinfo->mode); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static struct xt_target xt_setxid_target[] = { ++ { ++ .name = "SETXID", ++ .family = AF_INET, ++ .revision = 1, ++ .checkentry = checkentry_v1, ++ .target = target_v1, ++ .targetsize = sizeof(struct xt_setxid_target_info_v1), ++ .table = "mangle", ++ .me = THIS_MODULE, ++ } ++}; ++ ++static int __init init(void) ++{ ++ int err; ++ ++ err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); ++ return err; ++} ++ ++static void __exit fini(void) ++{ ++ xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); ++} ++ ++module_init(init); ++module_exit(fini); + diff --git a/linux-2.6-523-raw-sockets.patch b/linux-2.6-523-raw-sockets.patch new file mode 100644 index 000000000..24b05bdbc --- /dev/null +++ b/linux-2.6-523-raw-sockets.patch @@ -0,0 +1,160 @@ +diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h +--- linux-2.6.22-510/include/linux/vserver/network.h 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/include/linux/vserver/network.h 2008-06-06 17:07:56.000000000 -0400 +@@ -47,6 +47,8 @@ + #define NXC_TUN_CREATE 0x00000001 + + #define NXC_RAW_ICMP 0x00000100 ++#define NXC_RAW_SOCKET 0x00000200 ++#define NXC_RAW_SEND 0x00000400 + + + /* address types */ +diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h +--- linux-2.6.22-510/include/net/raw.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/net/raw.h 2008-06-06 17:07:56.000000000 -0400 +@@ -36,7 +36,7 @@ + + extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, +- int dif); ++ int dif, int tag); + + extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); + +diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c +--- linux-2.6.22-510/net/core/sock.c 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/net/core/sock.c 2008-06-06 17:07:56.000000000 -0400 +@@ -444,6 +444,19 @@ + } + goto set_sndbuf; + ++ case SO_SETXID: ++ if (current_vx_info()) { ++ ret = -EPERM; ++ break; ++ } ++ if (val < 0 || val > MAX_S_CONTEXT) { ++ ret = -EINVAL; ++ break; ++ } ++ sk->sk_xid = val; ++ sk->sk_nid = val; ++ break; ++ + case SO_RCVBUF: + /* Don't error on this BSD doesn't and if you think + about it this is right. Otherwise apps have to +@@ -573,7 +586,7 @@ + char devname[IFNAMSIZ]; + + /* Sorry... */ +- if (!capable(CAP_NET_RAW)) { ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { + ret = -EPERM; + break; + } +diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c +--- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400 +@@ -312,6 +314,9 @@ + if ((protocol == IPPROTO_ICMP) && + nx_capable(answer->capability, NXC_RAW_ICMP)) + goto override; ++ if (sock->type == SOCK_RAW && ++ nx_capable(answer->capability, NXC_RAW_SOCKET)) ++ goto override; + if (answer->capability > 0 && !capable(answer->capability)) + goto out_rcu_unlock; + override: +diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c +--- linux-2.6.22-510/net/ipv4/icmp.c 2008-06-06 17:07:55.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/icmp.c 2008-06-06 17:07:56.000000000 -0400 +@@ -709,7 +709,7 @@ + if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { + while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, + iph->saddr, +- skb->dev->ifindex)) != NULL) { ++ skb->dev->ifindex, skb->skb_tag)) != NULL) { + raw_err(raw_sk, skb, info); + raw_sk = sk_next(raw_sk); + iph = (struct iphdr *)skb->data; +diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c +--- linux-2.6.22-510/net/ipv4/ip_options.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/ip_options.c 2008-06-06 17:07:56.000000000 -0400 +@@ -409,7 +409,7 @@ + optptr[2] += 8; + break; + default: +- if (!skb && !capable(CAP_NET_RAW)) { ++ if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { + pp_ptr = optptr + 3; + goto error; + } +@@ -445,7 +445,7 @@ + opt->router_alert = optptr - iph; + break; + case IPOPT_CIPSO: +- if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { ++ if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) { + pp_ptr = optptr; + goto error; + } +@@ -458,7 +458,7 @@ + case IPOPT_SEC: + case IPOPT_SID: + default: +- if (!skb && !capable(CAP_NET_RAW)) { ++ if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { + pp_ptr = optptr; + goto error; + } +diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c +--- linux-2.6.22-510/net/ipv4/raw.c 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/net/ipv4/raw.c 2008-06-06 17:07:56.000000000 -0400 +@@ -103,7 +103,7 @@ + + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, +- int dif) ++ int dif, int tag) + { + struct hlist_node *node; + +@@ -112,6 +112,7 @@ + + if (inet->num == num && + !(inet->daddr && inet->daddr != raddr) && ++ (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) && + v4_sock_addr_match(sk->sk_nx_info, inet, laddr) && + !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + goto found; /* gotcha */ +@@ -161,7 +162,7 @@ + goto out; + sk = __raw_v4_lookup(__sk_head(head), iph->protocol, + iph->saddr, iph->daddr, +- skb->dev->ifindex); ++ skb->dev->ifindex, skb->skb_tag); + + while (sk) { + delivered = 1; +@@ -174,7 +175,7 @@ + } + sk = __raw_v4_lookup(sk_next(sk), iph->protocol, + iph->saddr, iph->daddr, +- skb->dev->ifindex); ++ skb->dev->ifindex, skb->skb_tag); + } + out: + read_unlock(&raw_v4_lock); +@@ -315,7 +316,7 @@ + } + + err = -EPERM; +- if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) && ++ if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) && + sk->sk_nx_info && + !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND)) + goto error_free; + + diff --git a/linux-2.6-524-peercred.patch b/linux-2.6-524-peercred.patch new file mode 100644 index 000000000..3cc87e8df --- /dev/null +++ b/linux-2.6-524-peercred.patch @@ -0,0 +1,14 @@ +diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h +--- linux-2.6.22-510/include/linux/socket.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/include/linux/socket.h 2008-06-06 17:07:56.000000000 -0400 +@@ -288,6 +288,8 @@ + #define SOL_TIPC 271 + #define SOL_RXRPC 272 + ++#define SO_SETXID SO_PEERCRED ++ + /* IPX options */ + #define IPX_TYPE 1 + + + diff --git a/linux-2.6-525-sknid-elevator.patch b/linux-2.6-525-sknid-elevator.patch new file mode 100644 index 000000000..0ff4d8aee --- /dev/null +++ b/linux-2.6-525-sknid-elevator.patch @@ -0,0 +1,185 @@ +diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c +--- linux-2.6.22-510/net/core/dev.c 2008-06-06 17:07:48.000000000 -0400 ++++ linux-2.6.22-520/net/core/dev.c 2008-06-06 17:07:56.000000000 -0400 +@@ -1803,6 +1803,7 @@ + * the ingress scheduler, you just cant add policies on ingress. + * + */ ++ + static int ing_filter(struct sk_buff *skb) + { + struct Qdisc *q; +@@ -1832,13 +1833,20 @@ + } + #endif + ++/* The code already makes the assumption that packet handlers run ++ * sequentially on the same CPU. -Sapan */ ++DEFINE_PER_CPU(int, sknid_elevator); ++ + int netif_receive_skb(struct sk_buff *skb) + { + struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; + int ret = NET_RX_DROP; ++ int *cur_elevator=&__get_cpu_var(sknid_elevator); + __be16 type; + ++ *cur_elevator = 0; ++ + /* if we've gotten here through NAPI, check netpoll */ + if (skb->dev->poll && netpoll_rx(skb)) + return NET_RX_DROP; +@@ -1873,8 +1881,9 @@ + + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) { +- if (pt_prev) ++ if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); ++ } + pt_prev = ptype; + } + } +@@ -1912,8 +1921,22 @@ + } + } + ++ /* We don't want the packet handlers to throw the packet away ++ * if we want the taps to treat it again - Sapan */ ++ if (*cur_elevator) { ++ atomic_inc(&skb->users); ++ } ++ + if (pt_prev) { + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); ++ if (*cur_elevator > 0) { ++ skb->skb_tag = *cur_elevator; ++ list_for_each_entry_rcu(ptype, &ptype_all, list) { ++ if (!ptype->dev || ptype->dev == skb->dev) { ++ ret = deliver_skb(skb, ptype, orig_dev); ++ } ++ } ++ } + } else { + kfree_skb(skb); + /* Jamal, now you will not able to escape explaining +@@ -1922,6 +1945,13 @@ + ret = NET_RX_DROP; + } + ++ if (*cur_elevator) { ++ /* We have a packet */ ++ kfree_skb(skb); ++ } ++ ++ *cur_elevator=0; ++ + out: + rcu_read_unlock(); + return ret; +@@ -3780,6 +3810,7 @@ + EXPORT_SYMBOL(net_enable_timestamp); + EXPORT_SYMBOL(net_disable_timestamp); + EXPORT_SYMBOL(dev_get_flags); ++EXPORT_PER_CPU_SYMBOL(sknid_elevator); + + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) + EXPORT_SYMBOL(br_handle_frame_hook); +diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c +--- linux-2.6.22-510/net/packet/af_packet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-520/net/packet/af_packet.c 2008-06-07 18:30:41.000000000 -0400 +@@ -78,6 +78,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_INET + #include +@@ -246,10 +247,13 @@ + + static const struct proto_ops packet_ops_spkt; + ++extern DEFINE_PER_CPU(int, sknid_elevator); + static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) + { + struct sock *sk; + struct sockaddr_pkt *spkt; ++ int tag = skb->skb_tag; ++ int *elevator=&__get_cpu_var(sknid_elevator); + + /* + * When we registered the protocol we saved the socket in the data +@@ -269,6 +273,22 @@ + * so that this procedure is noop. + */ + ++ /* ++ * (18:05:41) daniel_hozac: where? ++ * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we? ++ * (18:05:58) er: in packet_rcv_skpt ++ * (18:07:33) daniel_hozac: oh, that's evil. ++ */ ++ ++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) { ++ *elevator=-2; ++ goto out; ++ } ++ else if (!sk->sk_nx_info && *elevator) { ++ /* Root has already seen this packet */ ++ goto out; ++ } ++ + if (skb->pkt_type == PACKET_LOOPBACK) + goto out; + +@@ -324,6 +344,9 @@ + __be16 proto=0; + int err; + ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) ++ return -EPERM; ++ + /* + * Get and verify the address. + */ +@@ -420,6 +443,17 @@ + unsigned int res) + { + struct sk_filter *filter; ++ int tag = skb->skb_tag; ++ int *elevator=&__get_cpu_var(sknid_elevator); ++ ++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) { ++ *elevator=-2; ++ return 0; ++ } ++ else if (!sk->sk_nx_info && *elevator) { ++ /* Root has already seen this packet */ ++ return 0; ++ } + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); +@@ -711,6 +745,9 @@ + unsigned char *addr; + int ifindex, err, reserve = 0; + ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) ++ return -EPERM; ++ + /* + * Get and verify the address. + */ +@@ -984,8 +1021,9 @@ + __be16 proto = (__force __be16)protocol; /* weird, but documented */ + int err; + +- if (!capable(CAP_NET_RAW)) ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) + return -EPERM; ++ + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && + sock->type != SOCK_PACKET) + return -ESOCKTNOSUPPORT; -- 2.47.0