Optimize packet socket support to eliminate a packet copy.
authorSapan Bhatia <sapanb@cs.princeton.edu>
Mon, 28 Jul 2008 13:24:55 +0000 (13:24 +0000)
committerSapan Bhatia <sapanb@cs.princeton.edu>
Mon, 28 Jul 2008 13:24:55 +0000 (13:24 +0000)
linux-2.6-525-sknid-elevator.patch

index e2208a6..1b42d92 100644 (file)
@@ -11,7 +11,7 @@ diff -Nurb linux-2.6.22-524/include/linux/netdevice.h linux-2.6.22-525/include/l
                                         struct packet_type *,
 diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
 --- linux-2.6.22-524/net/core/dev.c    2008-07-27 22:06:20.000000000 -0400
-+++ linux-2.6.22-525/net/core/dev.c    2008-07-27 22:06:27.000000000 -0400
++++ linux-2.6.22-525/net/core/dev.c    2008-07-28 09:26:45.000000000 -0400
 @@ -97,6 +97,8 @@
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
@@ -38,7 +38,7 @@ diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
  static int ing_filter(struct sk_buff *skb)
  {
        struct Qdisc *q;
-@@ -1832,13 +1835,21 @@
+@@ -1832,13 +1835,20 @@
  }
  #endif
  
@@ -52,7 +52,6 @@ diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
        struct net_device *orig_dev;
        int ret = NET_RX_DROP;
 +      int *cur_elevator=&__get_cpu_var(sknid_elevator);
-+      struct sk_buff *skb2;
        __be16 type;
  
 +      *cur_elevator = 0;
@@ -60,11 +59,8 @@ diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
        /* if we've gotten here through NAPI, check netpoll */
        if (skb->dev->poll && netpoll_rx(skb))
                return NET_RX_DROP;
-@@ -1871,10 +1882,12 @@
-       }
- #endif
+@@ -1873,8 +1883,9 @@
  
-+      skb2 = skb_clone(skb, GFP_ATOMIC);
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
                if (!ptype->dev || ptype->dev == skb->dev) {
 -                      if (pt_prev)
@@ -74,72 +70,35 @@ diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
                        pt_prev = ptype;
                }
        }
-@@ -1891,6 +1904,7 @@
-       if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
-               kfree_skb(skb);
-+              kfree_skb(skb2);
-               goto out;
+@@ -1913,7 +1924,27 @@
        }
  
-@@ -1899,8 +1913,17 @@
- #endif
-       skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
--      if (!skb)
-+      if (!skb) {
-+              kfree_skb(skb2);
-               goto out;
-+      }
-+
-+      /* We don't want the packet handlers to throw the packet away
-+       * if we want the taps to treat it again - Sapan */
-+      if (!skb2) {
-+              *cur_elevator = 0;
-+      }
-+
-       type = skb->protocol;
-       list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
-@@ -1914,6 +1937,7 @@
        if (pt_prev) {
-               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-+              
-       } else {
-               kfree_skb(skb);
-               /* Jamal, now you will not able to escape explaining
-@@ -1922,6 +1946,29 @@
-               ret = NET_RX_DROP;
-       }
-+      if ((*cur_elevator)>0) {
-+              skb2->skb_tag = *cur_elevator;
-+              list_for_each_entry_rcu(ptype, &ptype_all, list) {
-+                      if ((!ptype->dev || ptype->dev == skb2->dev) && (ptype->sknid_elevator)) {
-+                              ret = deliver_skb(skb2, ptype, orig_dev);
-+                      }
++              /* At this point, cur_elevator may be -2 or a positive value, in
++               * case a previous protocol handler marked it */
++              if (*cur_elevator) {
++                      atomic_inc(&skb->users);
 +              }
-+              type = skb2->protocol;
-+              list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
-+                      if (ptype->type == type &&
-+                          (!ptype->dev || ptype->dev == skb2->dev) && (ptype->sknid_elevator)) {
-+                              ret = deliver_skb(skb2, ptype, orig_dev);
++              
+               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++
++              if ((*cur_elevator)>0) {
++                      skb->skb_tag = *cur_elevator;
++                      list_for_each_entry_rcu(ptype, &ptype_all, list) {
++                              if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) {
++                                      ret = deliver_skb(skb, ptype, orig_dev);
++                              }
 +                      }
 +              }
-+      }
-+
-+      if (skb2) {
-+              /* We have a packet */
-+              kfree_skb(skb2);
-+      }
-+
-+      *cur_elevator=0;
 +
- out:
-       rcu_read_unlock();
-       return ret;
-@@ -3780,6 +3827,7 @@
++              if (*cur_elevator) {
++                      /* We have a packet */
++                      kfree_skb(skb);
++              }
+       } else {
+               kfree_skb(skb);
+               /* Jamal, now you will not able to escape explaining
+@@ -3780,6 +3811,7 @@
  EXPORT_SYMBOL(net_enable_timestamp);
  EXPORT_SYMBOL(net_disable_timestamp);
  EXPORT_SYMBOL(dev_get_flags);