vserver 1.9.3
[linux-2.6.git] / net / core / dev.c
index 9456559..65b3541 100644 (file)
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/netpoll.h>
+#include <linux/rcupdate.h>
 #ifdef CONFIG_NET_RADIO
 #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
 #include <net/iw_handler.h>
 #endif /* CONFIG_NET_RADIO */
 #include <asm/current.h>
+#include <linux/vs_base.h>
+#include <linux/vs_network.h>
 
 /* This define, if set, will randomly drop a packet when congestion
  * is more than moderate.  It helps fairness in the multi-interface
@@ -215,11 +218,6 @@ static struct notifier_block *netdev_chain;
  */
 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
 
-#ifdef CONFIG_NET_FASTROUTE
-int netdev_fastroute;
-int netdev_fastroute_obstacles;
-#endif
-
 #ifdef CONFIG_SYSFS
 extern int netdev_sysfs_init(void);
 extern int netdev_register_sysfs(struct net_device *);
@@ -277,12 +275,6 @@ void dev_add_pack(struct packet_type *pt)
        int hash;
 
        spin_lock_bh(&ptype_lock);
-#ifdef CONFIG_NET_FASTROUTE
-       if (pt->af_packet_priv) {
-               netdev_fastroute_obstacles++;
-               dev_clear_fastroute(pt->dev);
-       }
-#endif
        if (pt->type == htons(ETH_P_ALL)) {
                netdev_nit++;
                list_add_rcu(&pt->list, &ptype_all);
@@ -325,10 +317,6 @@ void __dev_remove_pack(struct packet_type *pt)
 
        list_for_each_entry(pt1, head, list) {
                if (pt == pt1) {
-#ifdef CONFIG_NET_FASTROUTE
-                       if (pt->af_packet_priv)
-                               netdev_fastroute_obstacles--;
-#endif
                        list_del_rcu(&pt->list);
                        goto out;
                }
@@ -875,18 +863,6 @@ static int default_rebuild_header(struct sk_buff *skb)
 }
 
 
-/*
- * Some old buggy device drivers change get_stats after registering
- * the device.  Try and trap them here.
- * This can be elimnated when all devices are known fixed.
- */
-static inline int get_stats_changed(struct net_device *dev)
-{
-       int changed = dev->last_stats != dev->get_stats;
-       dev->last_stats = dev->get_stats;
-       return changed;
-}
-
 /**
  *     dev_open        - prepare an interface for use.
  *     @dev:   device to open
@@ -910,14 +886,6 @@ int dev_open(struct net_device *dev)
        if (dev->flags & IFF_UP)
                return 0;
 
-       /*
-        *       Check for broken device drivers.
-        */
-       if (get_stats_changed(dev) && net_ratelimit()) {
-               printk(KERN_ERR "%s: driver changed get_stats after register\n",
-                      dev->name);
-       }
-
        /*
         *      Is it even present?
         */
@@ -934,14 +902,6 @@ int dev_open(struct net_device *dev)
                        clear_bit(__LINK_STATE_START, &dev->state);
        }
 
-       /*
-        *      Check for more broken device drivers.
-        */
-       if (get_stats_changed(dev) && net_ratelimit()) {
-               printk(KERN_ERR "%s: driver changed get_stats in open\n",
-                      dev->name);
-       }
-
        /*
         *      If it went open OK then:
         */
@@ -970,39 +930,6 @@ int dev_open(struct net_device *dev)
        return ret;
 }
 
-#ifdef CONFIG_NET_FASTROUTE
-
-static void dev_do_clear_fastroute(struct net_device *dev)
-{
-       if (dev->accept_fastpath) {
-               int i;
-
-               for (i = 0; i <= NETDEV_FASTROUTE_HMASK; i++) {
-                       struct dst_entry *dst;
-
-                       write_lock_irq(&dev->fastpath_lock);
-                       dst = dev->fastpath[i];
-                       dev->fastpath[i] = NULL;
-                       write_unlock_irq(&dev->fastpath_lock);
-
-                       dst_release(dst);
-               }
-       }
-}
-
-void dev_clear_fastroute(struct net_device *dev)
-{
-       if (dev) {
-               dev_do_clear_fastroute(dev);
-       } else {
-               read_lock(&dev_base_lock);
-               for (dev = dev_base; dev; dev = dev->next)
-                       dev_do_clear_fastroute(dev);
-               read_unlock(&dev_base_lock);
-       }
-}
-#endif
-
 /**
  *     dev_close - shutdown an interface.
  *     @dev: device to shutdown
@@ -1055,9 +982,6 @@ int dev_close(struct net_device *dev)
         */
 
        dev->flags &= ~IFF_UP;
-#ifdef CONFIG_NET_FASTROUTE
-       dev_clear_fastroute(dev);
-#endif
 
        /*
         * Tell people we are down
@@ -1194,16 +1118,10 @@ int skb_checksum_help(struct sk_buff **pskb, int inward)
                goto out;
        }
 
-       if (skb_shared(*pskb)  || skb_cloned(*pskb)) {
-               struct sk_buff *newskb = skb_copy(*pskb, GFP_ATOMIC);
-               if (!newskb) {
-                       ret = -ENOMEM;
+       if (skb_cloned(*pskb)) {
+               ret = pskb_expand_head(*pskb, 0, 0, GFP_ATOMIC);
+               if (ret)
                        goto out;
-               }
-               if ((*pskb)->sk)
-                       skb_set_owner_w(newskb, (*pskb)->sk);
-               kfree_skb(*pskb);
-               *pskb = newskb;
        }
 
        if (offset > (int)(*pskb)->len)
@@ -1305,6 +1223,27 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
        return 0;
 }
 
+#define HARD_TX_LOCK(dev, cpu) {                       \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               spin_lock(&dev->xmit_lock);             \
+               dev->xmit_lock_owner = cpu;             \
+       }                                               \
+}
+
+#define HARD_TX_UNLOCK(dev) {                          \
+       if ((dev->features & NETIF_F_LLTX) == 0) {      \
+               dev->xmit_lock_owner = -1;              \
+               spin_unlock(&dev->xmit_lock);           \
+       }                                               \
+}
+
+static inline void qdisc_run(struct net_device *dev)
+{
+       while (!netif_queue_stopped(dev) &&
+              qdisc_restart(dev)<0)
+               /* NOTHING */;
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1348,15 +1287,37 @@ int dev_queue_xmit(struct sk_buff *skb)
                if (skb_checksum_help(&skb, 0))
                        goto out_kfree_skb;
 
-       /* Grab device queue */
-       spin_lock_bh(&dev->queue_lock);
-       q = dev->qdisc;
+
+       /* Disable soft irqs for various locks below. Also 
+        * stops preemption for RCU. 
+        */
+       local_bh_disable(); 
+
+       /* Updates of qdisc are serialized by queue_lock. 
+        * The struct Qdisc which is pointed to by qdisc is now a 
+        * rcu structure - it may be accessed without acquiring 
+        * a lock (but the structure may be stale.) The freeing of the
+        * qdisc will be deferred until it's known that there are no 
+        * more references to it.
+        * 
+        * If the qdisc has an enqueue function, we still need to 
+        * hold the queue_lock before calling it, since queue_lock
+        * also serializes access to the device queue.
+        */
+
+       q = rcu_dereference(dev->qdisc);
+#ifdef CONFIG_NET_CLS_ACT
+       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
        if (q->enqueue) {
+               /* Grab device queue */
+               spin_lock(&dev->queue_lock);
+
                rc = q->enqueue(skb, q);
 
                qdisc_run(dev);
 
-               spin_unlock_bh(&dev->queue_lock);
+               spin_unlock(&dev->queue_lock);
                rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
                goto out;
        }
@@ -1374,18 +1335,11 @@ int dev_queue_xmit(struct sk_buff *skb)
           Either shot noqueue qdisc, it is even simpler 8)
         */
        if (dev->flags & IFF_UP) {
-               int cpu = smp_processor_id();
+               int cpu = smp_processor_id(); /* ok because BHs are off */
 
                if (dev->xmit_lock_owner != cpu) {
-                       /*
-                        * The spin_lock effectivly does a preempt lock, but 
-                        * we are about to drop that...
-                        */
-                       preempt_disable();
-                       spin_unlock(&dev->queue_lock);
-                       spin_lock(&dev->xmit_lock);
-                       dev->xmit_lock_owner = cpu;
-                       preempt_enable();
+
+                       HARD_TX_LOCK(dev, cpu);
 
                        if (!netif_queue_stopped(dev)) {
                                if (netdev_nit)
@@ -1393,13 +1347,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 
                                rc = 0;
                                if (!dev->hard_start_xmit(skb, dev)) {
-                                       dev->xmit_lock_owner = -1;
-                                       spin_unlock_bh(&dev->xmit_lock);
+                                       HARD_TX_UNLOCK(dev);
                                        goto out;
                                }
                        }
-                       dev->xmit_lock_owner = -1;
-                       spin_unlock_bh(&dev->xmit_lock);
+                       HARD_TX_UNLOCK(dev);
                        if (net_ratelimit())
                                printk(KERN_CRIT "Virtual device %s asks to "
                                       "queue packet!\n", dev->name);
@@ -1412,12 +1364,12 @@ int dev_queue_xmit(struct sk_buff *skb)
                                       "%s, fix it urgently!\n", dev->name);
                }
        }
-       spin_unlock_bh(&dev->queue_lock);
 out_enetdown:
        rc = -ENETDOWN;
 out_kfree_skb:
        kfree_skb(skb);
 out:
+       local_bh_enable();
        return rc;
 }
 
@@ -1575,7 +1527,7 @@ int netif_rx(struct sk_buff *skb)
        struct softnet_data *queue;
        unsigned long flags;
 
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
        if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
                kfree_skb(skb);
                return NET_RX_DROP;
@@ -1694,42 +1646,75 @@ static void net_tx_action(struct softirq_action *h)
 }
 
 static __inline__ int deliver_skb(struct sk_buff *skb,
-                                 struct packet_type *pt_prev, int last)
+                                 struct packet_type *pt_prev)
 {
        atomic_inc(&skb->users);
        return pt_prev->func(skb, skb->dev, pt_prev);
 }
 
-
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct sk_buff *skb);
+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
 
-static __inline__ int handle_bridge(struct sk_buff *skb,
-                                    struct packet_type *pt_prev)
+static __inline__ int handle_bridge(struct sk_buff **pskb,
+                                   struct packet_type **pt_prev, int *ret)
 {
-       int ret = NET_RX_DROP;
-       if (pt_prev)
-               ret = deliver_skb(skb, pt_prev, 0);
+       struct net_bridge_port *port;
 
-       return ret;
-}
+       if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
+           (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
+               return 0;
 
+       if (*pt_prev) {
+               *ret = deliver_skb(*pskb, *pt_prev);
+               *pt_prev = NULL;
+       } 
+       
+       return br_handle_frame_hook(port, pskb);
+}
+#else
+#define handle_bridge(skb, pt_prev, ret)       (0)
 #endif
 
-static inline int __handle_bridge(struct sk_buff *skb,
-                       struct packet_type **pt_prev, int *ret)
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have 
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+int ing_filter(struct sk_buff *skb) 
 {
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-       if (skb->dev->br_port && skb->pkt_type != PACKET_LOOPBACK) {
-               *ret = handle_bridge(skb, *pt_prev);
-               if (br_handle_frame_hook(skb) == 0)
-                       return 1;
+       struct Qdisc *q;
+       struct net_device *dev = skb->dev;
+       int result = TC_ACT_OK;
+       
+       if (dev->qdisc_ingress) {
+               __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+               if (MAX_RED_LOOP < ttl++) {
+                       printk("Redir loop detected Dropping packet (%s->%s)\n",
+                               skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                       return TC_ACT_SHOT;
+               }
+
+               skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+               skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+               if (NULL == skb->input_dev) {
+                       skb->input_dev = skb->dev;
+                       printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
+               }
+               spin_lock(&dev->ingress_lock);
+               if ((q = dev->qdisc_ingress) != NULL)
+                       result = q->enqueue(skb, q);
+               spin_unlock(&dev->ingress_lock);
 
-               *pt_prev = NULL;
        }
-#endif
-       return 0;
+
+       return result;
 }
+#endif
 
 int netif_receive_skb(struct sk_buff *skb)
 {
@@ -1737,7 +1722,7 @@ int netif_receive_skb(struct sk_buff *skb)
        int ret = NET_RX_DROP;
        unsigned short type;
 
-#ifdef CONFIG_NETPOLL_RX
+#ifdef CONFIG_NETPOLL
        if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
                kfree_skb(skb);
                return NET_RX_DROP;
@@ -1751,29 +1736,50 @@ int netif_receive_skb(struct sk_buff *skb)
 
        __get_cpu_var(netdev_rx_stat).total++;
 
-#ifdef CONFIG_NET_FASTROUTE
-       if (skb->pkt_type == PACKET_FASTROUTE) {
-               __get_cpu_var(netdev_rx_stat).fastroute_deferred_out++;
-               return dev_queue_xmit(skb);
-       }
-#endif
-
        skb->h.raw = skb->nh.raw = skb->data;
        skb->mac_len = skb->nh.raw - skb->mac.raw;
 
        pt_prev = NULL;
+
        rcu_read_lock();
+
+#ifdef CONFIG_NET_CLS_ACT
+       if (skb->tc_verd & TC_NCLS) {
+               skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+               goto ncls;
+       }
+#endif
+
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
                if (!ptype->dev || ptype->dev == skb->dev) {
                        if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev, 0);
+                               ret = deliver_skb(skb, pt_prev);
                        pt_prev = ptype;
                }
        }
 
+#ifdef CONFIG_NET_CLS_ACT
+       if (pt_prev) {
+               ret = deliver_skb(skb, pt_prev);
+               pt_prev = NULL; /* noone else should process this after*/
+       } else {
+               skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+       }
+
+       ret = ing_filter(skb);
+
+       if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+               kfree_skb(skb);
+               goto out;
+       }
+
+       skb->tc_verd = 0;
+ncls:
+#endif
+
        handle_diverter(skb);
 
-       if (__handle_bridge(skb, &pt_prev, &ret))
+       if (handle_bridge(&skb, &pt_prev, &ret))
                goto out;
 
        type = skb->protocol;
@@ -1781,7 +1787,7 @@ int netif_receive_skb(struct sk_buff *skb)
                if (ptype->type == type &&
                    (!ptype->dev || ptype->dev == skb->dev)) {
                        if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev, 0);
+                               ret = deliver_skb(skb, pt_prev);
                        pt_prev = ptype;
                }
        }
@@ -1996,7 +2002,8 @@ static int dev_ifconf(char __user *arg)
 
        total = 0;
        for (dev = dev_base; dev; dev = dev->next) {
-               if (!dev_in_nx_info(dev, current->nx_info))
+               if (vx_flags(VXF_HIDE_NETIF, 0) &&
+                       !dev_in_nx_info(dev, current->nx_info))
                        continue;
                for (i = 0; i < NPROTO; i++) {
                        if (gifconf_list[i]) {
@@ -2060,7 +2067,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
        struct nx_info *nxi = current->nx_info;
 
-       if (!dev_in_nx_info(dev, nxi))
+       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
                return;
        if (dev->get_stats) {
                struct net_device_stats *stats = dev->get_stats(dev);
@@ -2275,13 +2282,6 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
        if ((dev->promiscuity += inc) == 0)
                dev->flags &= ~IFF_PROMISC;
        if (dev->flags ^ old_flags) {
-#ifdef CONFIG_NET_FASTROUTE
-               if (dev->flags & IFF_PROMISC) {
-                       netdev_fastroute_obstacles++;
-                       dev_clear_fastroute(dev);
-               } else
-                       netdev_fastroute_obstacles--;
-#endif
                dev_mc_upload(dev);
                printk(KERN_INFO "device %s %s promiscuous mode\n",
                       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -2830,8 +2830,8 @@ int register_netdevice(struct net_device *dev)
        spin_lock_init(&dev->queue_lock);
        spin_lock_init(&dev->xmit_lock);
        dev->xmit_lock_owner = -1;
-#ifdef CONFIG_NET_FASTROUTE
-       dev->fastpath_lock = RW_LOCK_UNLOCKED;
+#ifdef CONFIG_NET_CLS_ACT
+       spin_lock_init(&dev->ingress_lock);
 #endif
 
        ret = alloc_divert_blk(dev);
@@ -2939,7 +2939,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
        while (atomic_read(&dev->refcnt) != 0) {
                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
                        rtnl_shlock();
-                       rtnl_exlock();
 
                        /* Rebroadcast unregister notification */
                        notifier_call_chain(&netdev_chain,
@@ -2956,7 +2955,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
                                linkwatch_run_queue();
                        }
 
-                       rtnl_exunlock();
                        rtnl_shunlock();
 
                        rebroadcast_time = jiffies;
@@ -3154,10 +3152,6 @@ int unregister_netdevice(struct net_device *dev)
 
        synchronize_net();
 
-#ifdef CONFIG_NET_FASTROUTE
-       dev_clear_fastroute(dev);
-#endif
-
        /* Shutdown queueing discipline. */
        dev_shutdown(dev);
 
@@ -3183,6 +3177,8 @@ int unregister_netdevice(struct net_device *dev)
        /* Finish processing unregister after unlock */
        net_set_todo(dev);
 
+       synchronize_net();
+
        dev_put(dev);
        return 0;
 }
@@ -3251,6 +3247,8 @@ static int __init net_dev_init(void)
 
        BUG_ON(!dev_boot_phase);
 
+       net_random_init();
+
        if (dev_proc_init())
                goto out;
 
@@ -3329,6 +3327,9 @@ EXPORT_SYMBOL(dev_queue_xmit_nit);
 EXPORT_SYMBOL(dev_remove_pack);
 EXPORT_SYMBOL(dev_set_allmulti);
 EXPORT_SYMBOL(dev_set_promiscuity);
+EXPORT_SYMBOL(dev_change_flags);
+EXPORT_SYMBOL(dev_change_name);
+EXPORT_SYMBOL(dev_set_mtu);
 EXPORT_SYMBOL(free_netdev);
 EXPORT_SYMBOL(netdev_boot_setup_check);
 EXPORT_SYMBOL(netdev_set_master);
@@ -3346,10 +3347,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 EXPORT_SYMBOL(br_handle_frame_hook);
 #endif
-/* for 801q VLAN support */
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-EXPORT_SYMBOL(dev_change_flags);
-#endif
+
 #ifdef CONFIG_KMOD
 EXPORT_SYMBOL(dev_load);
 #endif
@@ -3359,9 +3357,10 @@ EXPORT_SYMBOL(netdev_fc_xoff);
 EXPORT_SYMBOL(netdev_register_fc);
 EXPORT_SYMBOL(netdev_unregister_fc);
 #endif
-#ifdef CONFIG_NET_FASTROUTE
-EXPORT_SYMBOL(netdev_fastroute);
-EXPORT_SYMBOL(netdev_fastroute_obstacles);
+
+#ifdef CONFIG_NET_CLS_ACT
+EXPORT_SYMBOL(ing_filter);
 #endif
 
+
 EXPORT_PER_CPU_SYMBOL(softnet_data);