Merge to Fedora kernel-2.6.7-1.492

[linux-2.6.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index bd0c3ad..0f47001 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
  #include <linux/module.h>
  #include <linux/kallsyms.h>
  #include <linux/netpoll.h>
+#include <linux/rcupdate.h>
  #ifdef CONFIG_NET_RADIO
  #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
  #include <net/iw_handler.h>
@@ -230,6 +231,8 @@ extern void netdev_unregister_sysfs(struct net_device *);
  #define        netdev_unregister_sysfs(dev)    do { } while(0)
  #endif
  
+/* netdump function */
+void (*netdump_func) (struct pt_regs *regs) = NULL;
  
  /*******************************************************************************
  
@@ -1305,6 +1308,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
         return 0;
  }
  
+#define HARD_TX_LOCK_BH(dev, cpu) {                    \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               spin_lock_bh(&dev->xmit_lock);          \
+               dev->xmit_lock_owner = cpu;             \
+       }                                               \
+}
+
+#define HARD_TX_UNLOCK_BH(dev) {                       \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               dev->xmit_lock_owner = -1;              \
+               spin_unlock_bh(&dev->xmit_lock);        \
+       }                                               \
+}
+
  /**
   *     dev_queue_xmit - transmit a buffer
   *     @skb: buffer to transmit
@@ -1348,18 +1365,38 @@ int dev_queue_xmit(struct sk_buff *skb)
                 if (skb_checksum_help(&skb, 0))
                         goto out_kfree_skb;
  
-       /* Grab device queue */
-       spin_lock_bh(&dev->queue_lock);
+       rcu_read_lock();
+       /* Updates of qdisc are serialized by queue_lock. 
+        * The struct Qdisc which is pointed to by qdisc is now a 
+        * rcu structure - it may be accessed without acquiring 
+        * a lock (but the structure may be stale.) The freeing of the
+        * qdisc will be deferred until it's known that there are no 
+        * more references to it.
+        * 
+        * If the qdisc has an enqueue function, we still need to 
+        * hold the queue_lock before calling it, since queue_lock
+        * also serializes access to the device queue.
+        */
+
         q = dev->qdisc;
+       smp_read_barrier_depends();
+#ifdef CONFIG_NET_CLS_ACT
+       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
         if (q->enqueue) {
+               /* Grab device queue */
+               spin_lock_bh(&dev->queue_lock);
+
                 rc = q->enqueue(skb, q);
  
                 qdisc_run(dev);
  
                 spin_unlock_bh(&dev->queue_lock);
+               rcu_read_unlock();
                 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
                 goto out;
         }
+       rcu_read_unlock();
  
         /* The device has no queue. Common case for software devices:
            loopback, all the sorts of tunnels...
@@ -1374,18 +1411,12 @@ int dev_queue_xmit(struct sk_buff *skb)
            Either shot noqueue qdisc, it is even simpler 8)
          */
         if (dev->flags & IFF_UP) {
-               int cpu = smp_processor_id();
+               int cpu = get_cpu();
  
                 if (dev->xmit_lock_owner != cpu) {
-                       /*
-                        * The spin_lock effectivly does a preempt lock, but 
-                        * we are about to drop that...
-                        */
-                       preempt_disable();
-                       spin_unlock(&dev->queue_lock);
-                       spin_lock(&dev->xmit_lock);
-                       dev->xmit_lock_owner = cpu;
-                       preempt_enable();
+
+                       HARD_TX_LOCK_BH(dev, cpu);
+                       put_cpu();
  
                         if (!netif_queue_stopped(dev)) {
                                 if (netdev_nit)
@@ -1393,18 +1424,17 @@ int dev_queue_xmit(struct sk_buff *skb)
  
                                 rc = 0;
                                 if (!dev->hard_start_xmit(skb, dev)) {
-                                       dev->xmit_lock_owner = -1;
-                                       spin_unlock_bh(&dev->xmit_lock);
+                                       HARD_TX_UNLOCK_BH(dev);
                                         goto out;
                                 }
                         }
-                       dev->xmit_lock_owner = -1;
-                       spin_unlock_bh(&dev->xmit_lock);
+                       HARD_TX_UNLOCK_BH(dev);
                         if (net_ratelimit())
                                 printk(KERN_CRIT "Virtual device %s asks to "
                                        "queue packet!\n", dev->name);
                         goto out_enetdown;
                 } else {
+                       put_cpu();
                         /* Recursion is detected! It is possible,
                          * unfortunately */
                         if (net_ratelimit())
@@ -1412,7 +1442,6 @@ int dev_queue_xmit(struct sk_buff *skb)
                                        "%s, fix it urgently!\n", dev->name);
                 }
         }
-       spin_unlock_bh(&dev->queue_lock);
  out_enetdown:
         rc = -ENETDOWN;
  out_kfree_skb:
@@ -1575,7 +1604,7 @@ int netif_rx(struct sk_buff *skb)
         struct softnet_data *queue;
         unsigned long flags;
  
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
         if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
                 kfree_skb(skb);
                 return NET_RX_DROP;
@@ -1731,13 +1760,55 @@ static inline int __handle_bridge(struct sk_buff *skb,
         return 0;
  }
  
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have 
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+int ing_filter(struct sk_buff *skb) 
+{
+       struct Qdisc *q;
+       struct net_device *dev = skb->dev;
+       int result = TC_ACT_OK;
+       
+       if (dev->qdisc_ingress) {
+               __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+               if (MAX_RED_LOOP < ttl++) {
+                       printk("Redir loop detected Dropping packet (%s->%s)\n",
+                               skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                       return TC_ACT_SHOT;
+               }
+
+               skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+               skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+               if (NULL == skb->input_dev) {
+                       skb->input_dev = skb->dev;
+                       printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
+               }
+               spin_lock(&dev->ingress_lock);
+               if ((q = dev->qdisc_ingress) != NULL)
+                       result = q->enqueue(skb, q);
+               spin_unlock(&dev->ingress_lock);
+
+       }
+
+       return result;
+}
+#endif
+
  int netif_receive_skb(struct sk_buff *skb)
  {
         struct packet_type *ptype, *pt_prev;
         int ret = NET_RX_DROP;
         unsigned short type;
  
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
         if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
                 kfree_skb(skb);
                 return NET_RX_DROP;
@@ -1762,6 +1833,14 @@ int netif_receive_skb(struct sk_buff *skb)
         skb->mac_len = skb->nh.raw - skb->mac.raw;
  
         pt_prev = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+       if (skb->tc_verd & TC_NCLS) {
+               skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+               rcu_read_lock();
+               goto ncls;
+       }
+ #endif
+
         rcu_read_lock();
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
                 if (!ptype->dev || ptype->dev == skb->dev) {
@@ -1771,6 +1850,26 @@ int netif_receive_skb(struct sk_buff *skb)
                 }
         }
  
+#ifdef CONFIG_NET_CLS_ACT
+       if (pt_prev) {
+               atomic_inc(&skb->users);
+               ret = pt_prev->func(skb, skb->dev, pt_prev);
+               pt_prev = NULL; /* noone else should process this after*/
+       } else {
+               skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+       }
+
+       ret = ing_filter(skb);
+
+       if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+               kfree_skb(skb);
+               goto out;
+       }
+
+       skb->tc_verd = 0;
+ncls:
+#endif
+
         handle_diverter(skb);
  
         if (__handle_bridge(skb, &pt_prev, &ret))
@@ -2830,6 +2929,10 @@ int register_netdevice(struct net_device *dev)
         spin_lock_init(&dev->queue_lock);
         spin_lock_init(&dev->xmit_lock);
         dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_CLS_ACT
+       spin_lock_init(&dev->ingress_lock);
+#endif
+
  #ifdef CONFIG_NET_FASTROUTE
         dev->fastpath_lock = RW_LOCK_UNLOCKED;
  #endif
@@ -3366,4 +3469,9 @@ EXPORT_SYMBOL(netdev_fastroute);
  EXPORT_SYMBOL(netdev_fastroute_obstacles);
  #endif
  
+#ifdef CONFIG_NET_CLS_ACT
+EXPORT_SYMBOL(ing_filter);
+#endif
+
+
  EXPORT_PER_CPU_SYMBOL(softnet_data);