Merge to Fedora kernel-2.6.7-1.492
[linux-2.6.git] / net / core / dev.c
index bd0c3ad..0f47001 100644 (file)
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/netpoll.h>
+#include <linux/rcupdate.h>
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
 #include <net/iw_handler.h>
@@ -230,6 +231,8 @@ extern void netdev_unregister_sysfs(struct net_device *);
 #define        netdev_unregister_sysfs(dev)    do { } while(0)
 #endif
 
+/* netdump function */
+void (*netdump_func) (struct pt_regs *regs) = NULL;
 
 /*******************************************************************************
 
@@ -1305,6 +1308,20 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
        return 0;
 }
 
+#define HARD_TX_LOCK_BH(dev, cpu) {                    \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               spin_lock_bh(&dev->xmit_lock);          \
+               dev->xmit_lock_owner = cpu;             \
+       }                                               \
+}
+
+#define HARD_TX_UNLOCK_BH(dev) {                       \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               dev->xmit_lock_owner = -1;              \
+               spin_unlock_bh(&dev->xmit_lock);        \
+       }                                               \
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1348,18 +1365,38 @@ int dev_queue_xmit(struct sk_buff *skb)
                if (skb_checksum_help(&skb, 0))
                        goto out_kfree_skb;
 
-       /* Grab device queue */
-       spin_lock_bh(&dev->queue_lock);
+       rcu_read_lock();
+       /* Updates of qdisc are serialized by queue_lock. 
+        * The struct Qdisc which is pointed to by qdisc is now a 
+        * rcu structure - it may be accessed without acquiring 
+        * a lock (but the structure may be stale.) The freeing of the
+        * qdisc will be deferred until it's known that there are no 
+        * more references to it.
+        * 
+        * If the qdisc has an enqueue function, we still need to 
+        * hold the queue_lock before calling it, since queue_lock
+        * also serializes access to the device queue.
+        */
+
        q = dev->qdisc;
+       smp_read_barrier_depends();
+#ifdef CONFIG_NET_CLS_ACT
+       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
        if (q->enqueue) {
+               /* Grab device queue */
+               spin_lock_bh(&dev->queue_lock);
+
                rc = q->enqueue(skb, q);
 
                qdisc_run(dev);
 
                spin_unlock_bh(&dev->queue_lock);
+               rcu_read_unlock();
                rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
                goto out;
        }
+       rcu_read_unlock();
 
        /* The device has no queue. Common case for software devices:
           loopback, all the sorts of tunnels...
@@ -1374,18 +1411,12 @@ int dev_queue_xmit(struct sk_buff *skb)
           Either shot noqueue qdisc, it is even simpler 8)
         */
        if (dev->flags & IFF_UP) {
-               int cpu = smp_processor_id();
+               int cpu = get_cpu();
 
                if (dev->xmit_lock_owner != cpu) {
-                       /*
-                        * The spin_lock effectivly does a preempt lock, but 
-                        * we are about to drop that...
-                        */
-                       preempt_disable();
-                       spin_unlock(&dev->queue_lock);
-                       spin_lock(&dev->xmit_lock);
-                       dev->xmit_lock_owner = cpu;
-                       preempt_enable();
+
+                       HARD_TX_LOCK_BH(dev, cpu);
+                       put_cpu();
 
                        if (!netif_queue_stopped(dev)) {
                                if (netdev_nit)
@@ -1393,18 +1424,17 @@ int dev_queue_xmit(struct sk_buff *skb)
 
                                rc = 0;
                                if (!dev->hard_start_xmit(skb, dev)) {
-                                       dev->xmit_lock_owner = -1;
-                                       spin_unlock_bh(&dev->xmit_lock);
+                                       HARD_TX_UNLOCK_BH(dev);
                                        goto out;
                                }
                        }
-                       dev->xmit_lock_owner = -1;
-                       spin_unlock_bh(&dev->xmit_lock);
+                       HARD_TX_UNLOCK_BH(dev);
                        if (net_ratelimit())
                                printk(KERN_CRIT "Virtual device %s asks to "
                                       "queue packet!\n", dev->name);
                        goto out_enetdown;
                } else {
+                       put_cpu();
                        /* Recursion is detected! It is possible,
                         * unfortunately */
                        if (net_ratelimit())
@@ -1412,7 +1442,6 @@ int dev_queue_xmit(struct sk_buff *skb)
                                       "%s, fix it urgently!\n", dev->name);
                }
        }
-       spin_unlock_bh(&dev->queue_lock);
 out_enetdown:
        rc = -ENETDOWN;
 out_kfree_skb:
@@ -1575,7 +1604,7 @@ int netif_rx(struct sk_buff *skb)
        struct softnet_data *queue;
        unsigned long flags;
 
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
        if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
                kfree_skb(skb);
                return NET_RX_DROP;
@@ -1731,13 +1760,55 @@ static inline int __handle_bridge(struct sk_buff *skb,
        return 0;
 }
 
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have 
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+int ing_filter(struct sk_buff *skb) 
+{
+       struct Qdisc *q;
+       struct net_device *dev = skb->dev;
+       int result = TC_ACT_OK;
+       
+       if (dev->qdisc_ingress) {
+               __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+               if (MAX_RED_LOOP < ttl++) {
+                       printk("Redir loop detected Dropping packet (%s->%s)\n",
+                               skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                       return TC_ACT_SHOT;
+               }
+
+               skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+               skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+               if (NULL == skb->input_dev) {
+                       skb->input_dev = skb->dev;
+                       printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
+               }
+               spin_lock(&dev->ingress_lock);
+               if ((q = dev->qdisc_ingress) != NULL)
+                       result = q->enqueue(skb, q);
+               spin_unlock(&dev->ingress_lock);
+
+       }
+
+       return result;
+}
+#endif
+
 int netif_receive_skb(struct sk_buff *skb)
 {
        struct packet_type *ptype, *pt_prev;
        int ret = NET_RX_DROP;
        unsigned short type;
 
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
        if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
                kfree_skb(skb);
                return NET_RX_DROP;
@@ -1762,6 +1833,14 @@ int netif_receive_skb(struct sk_buff *skb)
        skb->mac_len = skb->nh.raw - skb->mac.raw;
 
        pt_prev = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+       if (skb->tc_verd & TC_NCLS) {
+               skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+               rcu_read_lock();
+               goto ncls;
+       }
+ #endif
+
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
                if (!ptype->dev || ptype->dev == skb->dev) {
@@ -1771,6 +1850,26 @@ int netif_receive_skb(struct sk_buff *skb)
                }
        }
 
+#ifdef CONFIG_NET_CLS_ACT
+       if (pt_prev) {
+               atomic_inc(&skb->users);
+               ret = pt_prev->func(skb, skb->dev, pt_prev);
+               pt_prev = NULL; /* noone else should process this after*/
+       } else {
+               skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+       }
+
+       ret = ing_filter(skb);
+
+       if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+               kfree_skb(skb);
+               goto out;
+       }
+
+       skb->tc_verd = 0;
+ncls:
+#endif
+
        handle_diverter(skb);
 
        if (__handle_bridge(skb, &pt_prev, &ret))
@@ -2830,6 +2929,10 @@ int register_netdevice(struct net_device *dev)
        spin_lock_init(&dev->queue_lock);
        spin_lock_init(&dev->xmit_lock);
        dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_CLS_ACT
+       spin_lock_init(&dev->ingress_lock);
+#endif
+
 #ifdef CONFIG_NET_FASTROUTE
        dev->fastpath_lock = RW_LOCK_UNLOCKED;
 #endif
@@ -3366,4 +3469,9 @@ EXPORT_SYMBOL(netdev_fastroute);
 EXPORT_SYMBOL(netdev_fastroute_obstacles);
 #endif
 
+#ifdef CONFIG_NET_CLS_ACT
+EXPORT_SYMBOL(ing_filter);
+#endif
+
+
 EXPORT_PER_CPU_SYMBOL(softnet_data);