-diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
---- linux-2.6.22-524/net/core/dev.c 2008-07-15 11:39:32.000000000 -0400
-+++ linux-2.6.22-525/net/core/dev.c 2008-07-21 16:22:33.000000000 -0400
-@@ -1131,7 +1131,7 @@
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/include/linux/netdevice.h linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/include/linux/netdevice.h
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/include/linux/netdevice.h 2008-10-13 14:52:09.000000000 +0200
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/include/linux/netdevice.h 2009-01-21 03:38:41.000000000 +0100
+@@ -857,6 +857,7 @@ static inline void netif_napi_del(struct
+ struct packet_type {
+ __be16 type; /* This is really htons(ether_type). */
+ struct net_device *dev; /* NULL is wildcarded here */
++ unsigned char sknid_elevator;
+ int (*func) (struct sk_buff *,
+ struct net_device *,
+ struct packet_type *,
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/core/dev.c linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/core/dev.c
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/core/dev.c 2008-12-19 12:09:14.000000000 +0100
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/core/dev.c 2009-01-21 03:43:19.000000000 +0100
+@@ -99,6 +99,8 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/stat.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
+ #include <linux/if_bridge.h>
+ #include <linux/if_macvlan.h>
+ #include <net/dst.h>
+@@ -1318,7 +1320,7 @@ static void dev_queue_xmit_nit(struct sk
if ((ptype->dev == dev || !ptype->dev) &&
(ptype->af_packet_priv == NULL ||
(struct sock *)ptype->af_packet_priv != skb->sk)) {
if (!skb2)
break;
-@@ -1803,6 +1803,7 @@
- * the ingress scheduler, you just cant add policies on ingress.
- *
- */
-+
- static int ing_filter(struct sk_buff *skb)
- {
- struct Qdisc *q;
-@@ -1832,13 +1833,21 @@
+@@ -2170,6 +2172,10 @@ void netif_nit_deliver(struct sk_buff *s
+ rcu_read_unlock();
}
- #endif
+/* The code already makes the assumption that packet handlers run
+ * sequentially on the same CPU. -Sapan */
-+DEFINE_PER_CPU(int, sknid_elevator);
++DEFINE_PER_CPU(int, sknid_elevator) = 0;
+
- int netif_receive_skb(struct sk_buff *skb)
- {
- struct packet_type *ptype, *pt_prev;
+ /**
+ * netif_receive_skb - process receive buffer from network
+ * @skb: buffer to process
+@@ -2191,8 +2197,11 @@ int netif_receive_skb(struct sk_buff *sk
struct net_device *orig_dev;
+ struct net_device *null_or_orig;
int ret = NET_RX_DROP;
-+ int *cur_elevator=&__get_cpu_var(sknid_elevator);
-+ struct sk_buff *skb2;
++ int *cur_elevator = &__get_cpu_var(sknid_elevator);
__be16 type;
+ *cur_elevator = 0;
+
/* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
+ if (netpoll_receive_skb(skb))
return NET_RX_DROP;
-@@ -1873,8 +1882,9 @@
-
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev) {
-- if (pt_prev)
-+ if (pt_prev) {
- ret = deliver_skb(skb, pt_prev, orig_dev);
-+ }
- pt_prev = ptype;
- }
+@@ -2269,7 +2278,27 @@ ncls:
}
-@@ -1902,6 +1912,14 @@
- if (!skb)
- goto out;
-
-+ /* We don't want the packet handlers to throw the packet away
-+ * if we want the taps to treat it again - Sapan */
-+ if (*cur_elevator) {
-+ skb2 = skb_copy(skb,GFP_ATOMIC);
-+ if (!skb2) *cur_elevator=0;
-+ }
-+
-+
- type = skb->protocol;
- list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
- if (ptype->type == type &&
-@@ -1914,6 +1932,7 @@
if (pt_prev) {
- ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++ /* At this point, cur_elevator may be -2 or a positive value, in
++ * case a previous protocol handler marked it */
++ if (*cur_elevator) {
++ atomic_inc(&skb->users);
++ }
+
- } else {
- kfree_skb(skb);
- /* Jamal, now you will not able to escape explaining
-@@ -1922,6 +1941,21 @@
- ret = NET_RX_DROP;
- }
-
-+ if (*cur_elevator>0) {
-+ skb2->skb_tag = *cur_elevator;
-+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
-+ if (!ptype->dev || ptype->dev == skb2->dev) {
-+ ret = deliver_skb(skb2, ptype, orig_dev);
+ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++
++ if ((*cur_elevator)>0) {
++ skb->skb_tag = *cur_elevator;
++ list_for_each_entry_rcu(ptype, &ptype_all, list) {
++ if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) {
++ ret = deliver_skb(skb, ptype, orig_dev);
++ }
+ }
+ }
-+ }
-+ if (*cur_elevator) {
-+ /* We have a packet */
-+ kfree_skb(skb2);
-+ }
-+
-+ *cur_elevator=0;
+
- out:
- rcu_read_unlock();
- return ret;
-@@ -3780,6 +3814,7 @@
++ if (*cur_elevator) {
++ /* We have a packet */
++ kfree_skb(skb);
++ }
+ } else {
+ kfree_skb(skb);
+ /* Jamal, now you will not able to escape explaining
+@@ -4892,6 +4921,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
EXPORT_SYMBOL(net_enable_timestamp);
EXPORT_SYMBOL(net_disable_timestamp);
EXPORT_SYMBOL(dev_get_flags);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
EXPORT_SYMBOL(br_handle_frame_hook);
-diff -Nurb linux-2.6.22-524/net/packet/af_packet.c linux-2.6.22-525/net/packet/af_packet.c
---- linux-2.6.22-524/net/packet/af_packet.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-525/net/packet/af_packet.c 2008-07-15 11:40:11.000000000 -0400
-@@ -78,6 +78,7 @@
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/packet/af_packet.c linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/packet/af_packet.c
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/packet/af_packet.c 2008-10-13 14:52:09.000000000 +0200
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/packet/af_packet.c 2009-01-21 03:38:41.000000000 +0100
+@@ -77,6 +77,7 @@
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
-@@ -246,10 +247,13 @@
+@@ -276,10 +277,53 @@ static const struct proto_ops packet_ops
static const struct proto_ops packet_ops_spkt;
+extern DEFINE_PER_CPU(int, sknid_elevator);
++
++static inline unsigned int slice_check_and_elevate(struct sk_buff *skb, struct sock *sk) {
++ /* This mechanism is quite involved, and caused us a lot of pain
++ * including crashes and packet loss during the 4.2 rollout. This
++ * function decides if a slice is allowed to see a given packet.
++ * Unfortunately, the first time it is invoked for a packet it does not
++ * have enough information to make this call, since xt_MARK has not had
++ * a chance to tag it with the slice id. There is also no way of
++ * passing state between xt_MARK and this function through a packet --
++ * because the skb gets cloned quite a few times between these two
++ * points. I'd rather not use skb_shared_info because it's treated as
++ * a blob of memory, and so it would be quite hard to maintain.
++ *
++ * What we do is to keep a global variable (per CPU) that transfers the
++ * required state between xt_MARK and af_packet.c. As an optimization,
++ * this state transfer and the step that follows is only executed for
++ * packets that first get dropped here. When we drop a packet, we mark
++ * it for 'elevation' (that's what this trick is called). When xt_MARK
++ * tags the packet with the right slice, it intercepts this mark and
++ * sets the value of sknid_elevator. Next, the packet is sent back here
++ * for a second round, this time with the xid tag set.
++ */
++
++ int *elevator=&__get_cpu_var(sknid_elevator);
++ int tag = skb->skb_tag;
++
++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
++ if (skb->pkt_type==PACKET_HOST) {
++ *elevator=-2; /* Rejecting this packet. Mark it for elevation in xt_MARK */
++ }
++ return 0;
++ }
++ else if (!sk->sk_nx_info && (*elevator>0)) {
++ /* Root has already seen this packet once, since it has been elevated */
++ return 0;
++ }
++
++ return 1;
++}
++
static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct sock *sk;
struct sockaddr_pkt *spkt;
+ int tag = skb->skb_tag;
-+ int *elevator=&__get_cpu_var(sknid_elevator);
++
/*
* When we registered the protocol we saved the socket in the data
-@@ -269,6 +273,22 @@
+@@ -299,6 +343,16 @@ static int packet_rcv_spkt(struct sk_buf
* so that this procedure is noop.
*/
+ * (18:07:33) daniel_hozac: oh, that's evil.
+ */
+
-+ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
-+ *elevator=-2;
-+ goto out;
-+ }
-+ else if (!sk->sk_nx_info && *elevator) {
-+ /* Root has already seen this packet */
-+ goto out;
-+ }
++ if (!slice_check_and_elevate(skb, sk))
++ return 0;
+
if (skb->pkt_type == PACKET_LOOPBACK)
goto out;
-@@ -324,6 +344,9 @@
+@@ -357,6 +411,9 @@ static int packet_sendmsg_spkt(struct ki
__be16 proto=0;
int err;
/*
* Get and verify the address.
*/
-@@ -420,6 +443,17 @@
+@@ -449,11 +506,16 @@ out_unlock:
+ return err;
+ }
+
++
++
+ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
unsigned int res)
{
struct sk_filter *filter;
-+ int tag = skb->skb_tag;
-+ int *elevator=&__get_cpu_var(sknid_elevator);
-+
-+ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
-+ *elevator=-2;
-+ return 0;
-+ }
-+ else if (!sk->sk_nx_info && *elevator) {
-+ /* Root has already seen this packet */
-+ return 0;
-+ }
++ if (!slice_check_and_elevate(skb, sk))
++ return 0;
++
rcu_read_lock_bh();
filter = rcu_dereference(sk->sk_filter);
-@@ -711,6 +745,9 @@
+ if (filter != NULL)
+@@ -773,6 +835,9 @@ static int packet_sendmsg(struct kiocb *
unsigned char *addr;
int ifindex, err, reserve = 0;
/*
* Get and verify the address.
*/
-@@ -984,8 +1021,9 @@
+@@ -939,6 +1004,7 @@ static int packet_do_bind(struct sock *s
+
+ po->num = protocol;
+ po->prot_hook.type = protocol;
++ po->prot_hook.sknid_elevator = 1;
+ po->prot_hook.dev = dev;
+
+ po->ifindex = dev ? dev->ifindex : 0;
+@@ -1037,8 +1103,9 @@ static int packet_create(struct net *net
__be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
return -ESOCKTNOSUPPORT;
+@@ -1069,6 +1136,7 @@ static int packet_create(struct net *net
+
+ spin_lock_init(&po->bind_lock);
+ po->prot_hook.func = packet_rcv;
++ po->prot_hook.sknid_elevator = 1;
+
+ if (sock->type == SOCK_PACKET)
+ po->prot_hook.func = packet_rcv_spkt;