fedora core 6 1.2949 + vserver 2.2.0

[linux-2.6.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index 0301ffc..81a3ae2 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7,7 +7,7 @@
   *             2 of the License, or (at your option) any later version.
   *
   *     Derived from the non IP parts of dev.c 1.0.19
- *             Authors:        Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Authors:        Ross Biro
   *                             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   *                             Mark Evans, <evansmp@uhura.aston.ac.uk>
   *
@@ -74,12 +74,13 @@
  
  #include <asm/uaccess.h>
  #include <asm/system.h>
-#include <asm/bitops.h>
-#include <linux/config.h>
+#include <linux/bitops.h>
+#include <linux/capability.h>
  #include <linux/cpu.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/sched.h>
+#include <linux/mutex.h>
  #include <linux/string.h>
  #include <linux/mm.h>
  #include <linux/socket.h>
@@ -97,7 +98,6 @@
  #include <linux/seq_file.h>
  #include <linux/stat.h>
  #include <linux/if_bridge.h>
-#include <linux/divert.h>
  #include <net/dst.h>
  #include <net/pkt_sched.h>
  #include <net/checksum.h>
@@ -108,24 +108,22 @@
  #include <linux/kallsyms.h>
  #include <linux/netpoll.h>
  #include <linux/rcupdate.h>
-#ifdef CONFIG_NET_RADIO
-#include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
+#include <linux/delay.h>
+#include <linux/wireless.h>
  #include <net/iw_handler.h>
-#endif /* CONFIG_NET_RADIO */
  #include <asm/current.h>
+#include <linux/audit.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/ctype.h>
+#include <linux/vs_context.h> /* remove with NXF_HIDE_NETIF */
  #include <linux/vs_network.h>
  
-/* This define, if set, will randomly drop a packet when congestion
- * is more than moderate.  It helps fairness in the multi-interface
- * case when one of them is a hog, but it kills performance for the
- * single interface case so it is off now by default.
- */
-#undef RAND_LIE
-
-/* Setting this will sample the queue lengths and thus congestion
- * via a timer instead of as each packet is received.
- */
-#undef OFFLINE_SAMPLE
+#ifdef CONFIG_XEN
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#endif
  
  /*
   *     The list of packet types we will receive (as opposed to discard)
@@ -138,7 +136,7 @@
   *             sure which should go first, but I bet it won't make much
   *             difference if we are running VLANs.  The good news is that
   *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversly affected.
+ *             the average user (w/out VLANs) will not be adversely affected.
   *             --BLG
   *
   *             0800    IP
@@ -155,17 +153,18 @@
   *             86DD    IPv6
   */
  
-static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(ptype_lock);
  static struct list_head ptype_base[16];        /* 16 way hashed list */
  static struct list_head ptype_all;             /* Taps */
  
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy);
-static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
+#ifdef CONFIG_NET_DMA
+static struct dma_client *net_dma_client;
+static unsigned int net_dma_count;
+static spinlock_t net_dma_event_lock;
  #endif
  
  /*
- * The @dev_base list is protected by @dev_base_lock and the rtln
+ * The @dev_base list is protected by @dev_base_lock and the rtnl
   * semaphore.
   *
   * Pure readers hold dev_base_lock for reading.
@@ -184,8 +183,8 @@ static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
   * semaphore held.
   */
  struct net_device *dev_base;
-struct net_device **dev_tail = &dev_base;
-rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+static struct net_device **dev_tail = &dev_base;
+DEFINE_RWLOCK(dev_base_lock);
  
  EXPORT_SYMBOL(dev_base);
  EXPORT_SYMBOL(dev_base_lock);
@@ -209,13 +208,13 @@ static inline struct hlist_head *dev_index_hash(int ifindex)
   *     Our notifier list
   */
  
-static struct notifier_block *netdev_chain;
+static RAW_NOTIFIER_HEAD(netdev_chain);
  
  /*
   *     Device drivers call our routines to queue packets here. We empty the
   *     queue in the local softnet handler.
   */
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
  
  #ifdef CONFIG_SYSFS
  extern int netdev_sysfs_init(void);
@@ -238,7 +237,7 @@ extern void netdev_unregister_sysfs(struct net_device *);
   *     For efficiency
   */
  
-int netdev_nit;
+static int netdev_nit;
  
  /*
   *     Add a protocol ID to the list. Now that the input handler is
@@ -284,10 +283,6 @@ void dev_add_pack(struct packet_type *pt)
         spin_unlock_bh(&ptype_lock);
  }
  
-extern void linkwatch_run_queue(void);
-
-
-
  /**
   *     __dev_remove_pack        - remove packet handler
   *     @pt: packet type declaration
@@ -362,7 +357,7 @@ static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
   *     returns 0 on error and 1 on success.  This is a generic routine to
   *     all netdevices.
   */
-int netdev_boot_setup_add(char *name, struct ifmap *map)
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
  {
         struct netdev_boot_setup *s;
         int i;
@@ -521,35 +516,6 @@ struct net_device *dev_get_by_name(const char *name)
         return dev;
  }
  
-/*
-   Return value is changed to int to prevent illegal usage in future.
-   It is still legal to use to check for device existence.
-
-   User should understand, that the result returned by this function
-   is meaningless, if it was not issued under rtnl semaphore.
- */
-
-/**
- *     dev_get -       test if a device exists
- *     @name:  name to test for
- *
- *     Test if a name exists. Returns true if the name is found. In order
- *     to be sure the name is not allocated or removed during the test the
- *     caller must hold the rtnl semaphore.
- *
- *     This function exists only for back compatibility with older
- *     drivers.
- */
-int __dev_get(const char *name)
-{
-       struct net_device *dev;
-
-       read_lock(&dev_base_lock);
-       dev = __dev_get_by_name(name);
-       read_unlock(&dev_base_lock);
-       return dev != NULL;
-}
-
  /**
   *     __dev_get_by_index - find a device by its ifindex
   *     @ifindex: index of device
@@ -624,26 +590,19 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
         return dev;
  }
  
-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
-{
-       struct net_device *dev;
-
-       for (dev = dev_base; dev; dev = dev->next)
-               if (dev->type == type)
-                       break;
-       return dev;
-}
-
-EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+EXPORT_SYMBOL(dev_getbyhwaddr);
  
  struct net_device *dev_getfirstbyhwtype(unsigned short type)
  {
         struct net_device *dev;
  
         rtnl_lock();
-       dev = __dev_getfirstbyhwtype(type);
-       if (dev)
-               dev_hold(dev);
+       for (dev = dev_base; dev; dev = dev->next) {
+               if (dev->type == type) {
+                       dev_hold(dev);
+                       break;
+               }
+       }
         rtnl_unlock();
         return dev;
  }
@@ -666,32 +625,14 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
         struct net_device *dev;
  
         read_lock(&dev_base_lock);
-       dev = __dev_get_by_flags(if_flags, mask);
-       if (dev)
-               dev_hold(dev);
-       read_unlock(&dev_base_lock);
-       return dev;
-}
-
-/**
- *     __dev_get_by_flags - find any device with given flags
- *     @if_flags: IFF_* values
- *     @mask: bitmask of bits in if_flags to check
- *
- *     Search for any interface with the given flags. Returns NULL if a device
- *     is not found or a pointer to the device. The caller must hold either
- *     the RTNL semaphore or @dev_base_lock.
- */
-
-struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
-{
-       struct net_device *dev;
-
         for (dev = dev_base; dev != NULL; dev = dev->next) {
-               if (((dev->flags ^ if_flags) & mask) == 0)
-                       return dev;
+               if (((dev->flags ^ if_flags) & mask) == 0) {
+                       dev_hold(dev);
+                       break;
+               }
         }
-       return NULL;
+       read_unlock(&dev_base_lock);
+       return dev;
  }
  
  /**
@@ -699,14 +640,24 @@ struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short ma
   *     @name: name string
   *
   *     Network device names need to be valid file names to
- *     to allow sysfs to work
+ *     to allow sysfs to work.  We also disallow any kind of
+ *     whitespace.
   */
  int dev_valid_name(const char *name)
  {
-       return !(*name == '\0' 
-                || !strcmp(name, ".")
-                || !strcmp(name, "..")
-                || strchr(name, '/'));
+       if (*name == '\0')
+               return 0;
+       if (strlen(name) >= IFNAMSIZ)
+               return 0;
+       if (!strcmp(name, ".") || !strcmp(name, ".."))
+               return 0;
+
+       while (*name) {
+               if (*name == '/' || isspace(*name))
+                       return 0;
+               name++;
+       }
+       return 1;
  }
  
  /**
@@ -715,10 +666,12 @@ int dev_valid_name(const char *name)
   *     @name: name format string
   *
   *     Passed a format string - eg "lt%d" it will try and find a suitable
- *     id. Not efficient for many devices, not called a lot. The caller
- *     must hold the dev_base or rtnl lock while allocating the name and
- *     adding the device in order to avoid duplicates. Returns the number
- *     of the unit assigned or a negative errno code.
+ *     id. It scans list of devices to build up a free map, then chooses
+ *     the first empty slot. The caller must hold the dev_base or rtnl lock
+ *     while allocating the name and adding the device in order to avoid
+ *     duplicates.
+ *     Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ *     Returns the number of the unit assigned or a negative errno code.
   */
  
  int dev_alloc_name(struct net_device *dev, const char *name)
@@ -810,12 +763,25 @@ int dev_change_name(struct net_device *dev, char *newname)
         if (!err) {
                 hlist_del(&dev->name_hlist);
                 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-               notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+               raw_notifier_call_chain(&netdev_chain,
+                               NETDEV_CHANGENAME, dev);
         }
  
         return err;
  }
  
+/**
+ *     netdev_features_change - device changes features
+ *     @dev: device to cause notification
+ *
+ *     Called to indicate a device has changed features.
+ */
+void netdev_features_change(struct net_device *dev)
+{
+       raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL(netdev_features_change);
+
  /**
   *     netdev_state_change - device changes state
   *     @dev: device to cause notification
@@ -827,7 +793,8 @@ int dev_change_name(struct net_device *dev, char *newname)
  void netdev_state_change(struct net_device *dev)
  {
         if (dev->flags & IFF_UP) {
-               notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+               raw_notifier_call_chain(&netdev_chain,
+                               NETDEV_CHANGE, dev);
                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
         }
  }
@@ -862,18 +829,6 @@ static int default_rebuild_header(struct sk_buff *skb)
  }
  
  
-/*
- * Some old buggy device drivers change get_stats after registering
- * the device.  Try and trap them here.
- * This can be elimnated when all devices are known fixed.
- */
-static inline int get_stats_changed(struct net_device *dev)
-{
-       int changed = dev->last_stats != dev->get_stats;
-       dev->last_stats = dev->get_stats;
-       return changed;
-}
-
  /**
   *     dev_open        - prepare an interface for use.
   *     @dev:   device to open
@@ -897,14 +852,6 @@ int dev_open(struct net_device *dev)
         if (dev->flags & IFF_UP)
                 return 0;
  
-       /*
-        *       Check for broken device drivers.
-        */
-       if (get_stats_changed(dev) && net_ratelimit()) {
-               printk(KERN_ERR "%s: driver changed get_stats after register\n",
-                      dev->name);
-       }
-
         /*
          *      Is it even present?
          */
@@ -921,14 +868,6 @@ int dev_open(struct net_device *dev)
                         clear_bit(__LINK_STATE_START, &dev->state);
         }
  
-       /*
-        *      Check for more broken device drivers.
-        */
-       if (get_stats_changed(dev) && net_ratelimit()) {
-               printk(KERN_ERR "%s: driver changed get_stats in open\n",
-                      dev->name);
-       }
-
         /*
          *      If it went open OK then:
          */
@@ -952,7 +891,7 @@ int dev_open(struct net_device *dev)
                 /*
                  *      ... and announce new interface.
                  */
-               notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+               raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
         }
         return ret;
  }
@@ -975,7 +914,7 @@ int dev_close(struct net_device *dev)
          *      Tell people we are going down, so that they can
          *      prepare to death, when device is still operating.
          */
-       notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+       raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
  
         dev_deactivate(dev);
  
@@ -990,8 +929,7 @@ int dev_close(struct net_device *dev)
         smp_mb__after_clear_bit(); /* Commit netif_running(). */
         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
                 /* No hurry. */
-               current->state = TASK_INTERRUPTIBLE;
-               schedule_timeout(1);
+               msleep(1);
         }
  
         /*
@@ -1013,7 +951,7 @@ int dev_close(struct net_device *dev)
         /*
          * Tell people we are down
          */
-       notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+       raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
  
         return 0;
  }
@@ -1044,7 +982,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
         int err;
  
         rtnl_lock();
-       err = notifier_chain_register(&netdev_chain, nb);
+       err = raw_notifier_chain_register(&netdev_chain, nb);
         if (!err) {
                 for (dev = dev_base; dev; dev = dev->next) {
                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
@@ -1069,7 +1007,12 @@ int register_netdevice_notifier(struct notifier_block *nb)
  
  int unregister_netdevice_notifier(struct notifier_block *nb)
  {
-       return notifier_chain_unregister(&netdev_chain, nb);
+       int err;
+
+       rtnl_lock();
+       err = raw_notifier_chain_unregister(&netdev_chain, nb);
+       rtnl_unlock();
+       return err;
  }
  
  /**
@@ -1078,12 +1021,44 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
   *      @v:   pointer passed unmodified to notifier function
   *
   *     Call all network notifier blocks.  Parameters and return value
- *     are as for notifier_call_chain().
+ *     are as for raw_notifier_call_chain().
   */
  
  int call_netdevice_notifiers(unsigned long val, void *v)
  {
-       return notifier_call_chain(&netdev_chain, val, v);
+       return raw_notifier_call_chain(&netdev_chain, val, v);
+}
+
+/* When > 0 there are consumers of rx skb time stamps */
+static atomic_t netstamp_needed = ATOMIC_INIT(0);
+
+void net_enable_timestamp(void)
+{
+       atomic_inc(&netstamp_needed);
+}
+
+void net_disable_timestamp(void)
+{
+       atomic_dec(&netstamp_needed);
+}
+
+void __net_timestamp(struct sk_buff *skb)
+{
+       struct timeval tv;
+
+       do_gettimeofday(&tv);
+       skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+
+static inline void net_timestamp(struct sk_buff *skb)
+{
+       if (atomic_read(&netstamp_needed))
+               __net_timestamp(skb);
+       else {
+               skb->tstamp.off_sec = 0;
+               skb->tstamp.off_usec = 0;
+       }
  }
  
  /*
@@ -1091,10 +1066,11 @@ int call_netdevice_notifiers(unsigned long val, void *v)
   *     taps currently in use.
   */
  
-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  {
         struct packet_type *ptype;
-       net_timestamp(&skb->stamp);
+
+       net_timestamp(skb);
  
         rcu_read_lock();
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1125,55 +1101,179 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  
                         skb2->h.raw = skb2->nh.raw;
                         skb2->pkt_type = PACKET_OUTGOING;
-                       ptype->func(skb2, skb->dev, ptype);
+                       ptype->func(skb2, skb->dev, ptype, skb->dev);
                 }
         }
         rcu_read_unlock();
  }
  
+
+void __netif_schedule(struct net_device *dev)
+{
+       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+               unsigned long flags;
+               struct softnet_data *sd;
+
+               local_irq_save(flags);
+               sd = &__get_cpu_var(softnet_data);
+               dev->next_sched = sd->output_queue;
+               sd->output_queue = dev;
+               raise_softirq_irqoff(NET_TX_SOFTIRQ);
+               local_irq_restore(flags);
+       }
+}
+EXPORT_SYMBOL(__netif_schedule);
+
+void __netif_rx_schedule(struct net_device *dev)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       dev_hold(dev);
+       list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
+       if (dev->quota < 0)
+               dev->quota += dev->weight;
+       else
+               dev->quota = dev->weight;
+       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(__netif_rx_schedule);
+
+void dev_kfree_skb_any(struct sk_buff *skb)
+{
+       if (in_irq() || irqs_disabled())
+               dev_kfree_skb_irq(skb);
+       else
+               dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(dev_kfree_skb_any);
+
+
+/* Hot-plugging. */
+void netif_device_detach(struct net_device *dev)
+{
+       if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+           netif_running(dev)) {
+               netif_stop_queue(dev);
+       }
+}
+EXPORT_SYMBOL(netif_device_detach);
+
+void netif_device_attach(struct net_device *dev)
+{
+       if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+           netif_running(dev)) {
+               netif_wake_queue(dev);
+               __netdev_watchdog_up(dev);
+       }
+}
+EXPORT_SYMBOL(netif_device_attach);
+
+
  /*
   * Invalidate hardware checksum when packet is to be mangled, and
   * complete checksum manually on outgoing path.
   */
-int skb_checksum_help(struct sk_buff **pskb, int inward)
+int skb_checksum_help(struct sk_buff *skb)
  {
-       unsigned int csum;
-       int ret = 0, offset = (*pskb)->h.raw - (*pskb)->data;
+       __wsum csum;
+       int ret = 0, offset = skb->h.raw - skb->data;
  
-       if (inward) {
-               (*pskb)->ip_summed = CHECKSUM_NONE;
-               goto out;
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               goto out_set_summed;
+
+       if (unlikely(skb_shinfo(skb)->gso_size)) {
+               /* Let GSO fix up the checksum. */
+               goto out_set_summed;
         }
  
-       if (skb_shared(*pskb)  || skb_cloned(*pskb)) {
-               struct sk_buff *newskb = skb_copy(*pskb, GFP_ATOMIC);
-               if (!newskb) {
-                       ret = -ENOMEM;
+       if (skb_cloned(skb)) {
+               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+               if (ret)
                         goto out;
-               }
-               if ((*pskb)->sk)
-                       skb_set_owner_w(newskb, (*pskb)->sk);
-               kfree_skb(*pskb);
-               *pskb = newskb;
         }
  
-       if (offset > (int)(*pskb)->len)
-               BUG();
-       csum = skb_checksum(*pskb, offset, (*pskb)->len-offset, 0);
+       BUG_ON(offset > (int)skb->len);
+       csum = skb_checksum(skb, offset, skb->len-offset, 0);
+
+       offset = skb->tail - skb->h.raw;
+       BUG_ON(offset <= 0);
+       BUG_ON(skb->csum_offset + 2 > offset);
  
-       offset = (*pskb)->tail - (*pskb)->h.raw;
-       if (offset <= 0)
-               BUG();
-       if ((*pskb)->csum + 2 > offset)
-               BUG();
+       *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
  
-       *(u16*)((*pskb)->h.raw + (*pskb)->csum) = csum_fold(csum);
-       (*pskb)->ip_summed = CHECKSUM_NONE;
+out_set_summed:
+       skb->ip_summed = CHECKSUM_NONE;
  out:   
         return ret;
  }
  
-#ifdef CONFIG_HIGHMEM
+/**
+ *     skb_gso_segment - Perform segmentation on skb.
+ *     @skb: buffer to segment
+ *     @features: features for the output path (see dev->features)
+ *
+ *     This function segments the given skb and returns a list of segments.
+ *
+ *     It may return NULL if the skb requires no segmentation.  This is
+ *     only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       int err;
+
+       BUG_ON(skb_shinfo(skb)->frag_list);
+
+       skb->mac.raw = skb->data;
+       skb->mac_len = skb->nh.raw - skb->data;
+       __skb_pull(skb, skb->mac_len);
+
+       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+               if (skb_header_cloned(skb) &&
+                   (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+                       return ERR_PTR(err);
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+                       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+                               err = ptype->gso_send_check(skb);
+                               segs = ERR_PTR(err);
+                               if (err || skb_gso_ok(skb, features))
+                                       break;
+                               __skb_push(skb, skb->data - skb->nh.raw);
+                       }
+                       segs = ptype->gso_segment(skb, features);
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       __skb_push(skb, skb->data - skb->mac.raw);
+
+       return segs;
+}
+
+EXPORT_SYMBOL(skb_gso_segment);
+
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+       if (net_ratelimit()) {
+               printk(KERN_ERR "%s: hw csum failure.\n", 
+                       dev ? dev->name : "<unknown>");
+               dump_stack();
+       }
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
  /* Actually, we should eliminate this check as soon as we know, that:
   * 1. IOMMU is present and allows to map all the memory.
   * 2. No high memory really exists on this machine.
@@ -1181,101 +1281,161 @@ out:
  
  static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
  {
+#ifdef CONFIG_HIGHMEM
         int i;
  
         if (dev->features & NETIF_F_HIGHDMA)
                 return 0;
  
         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-               if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
+               if (PageHighMem(skb_shinfo(skb)->frags[i].page))
                         return 1;
  
+#endif
         return 0;
  }
-#else
-#define illegal_highdma(dev, skb)      (0)
-#endif
  
-extern void skb_release_data(struct sk_buff *);
+struct dev_gso_cb {
+       void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
  
-/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+static void dev_gso_skb_destructor(struct sk_buff *skb)
  {
-       unsigned int size;
-       u8 *data;
-       long offset;
-       struct skb_shared_info *ninfo;
-       int headerlen = skb->data - skb->head;
-       int expand = (skb->tail + skb->data_len) - skb->end;
+       struct dev_gso_cb *cb;
  
-       if (skb_shared(skb))
-               BUG();
+       do {
+               struct sk_buff *nskb = skb->next;
  
-       if (expand <= 0)
-               expand = 0;
+               skb->next = nskb->next;
+               nskb->next = NULL;
+               kfree_skb(nskb);
+       } while (skb->next);
  
-       size = skb->end - skb->head + expand;
-       size = SKB_DATA_ALIGN(size);
-       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-       if (!data)
-               return -ENOMEM;
+       cb = DEV_GSO_CB(skb);
+       if (cb->destructor)
+               cb->destructor(skb);
+}
  
-       /* Copy entire thing */
-       if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
-               BUG();
+/**
+ *     dev_gso_segment - Perform emulated hardware segmentation on skb.
+ *     @skb: buffer to segment
+ *
+ *     This function segments the given skb and stores the list of segments
+ *     in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       struct sk_buff *segs;
+       int features = dev->features & ~(illegal_highdma(dev, skb) ?
+                                        NETIF_F_SG : 0);
+
+       segs = skb_gso_segment(skb, features);
+
+       /* Verifying header integrity only. */
+       if (!segs)
+               return 0;
+
+       if (unlikely(IS_ERR(segs)))
+               return PTR_ERR(segs);
  
-       /* Set up shinfo */
-       ninfo = (struct skb_shared_info*)(data + size);
-       atomic_set(&ninfo->dataref, 1);
-       ninfo->tso_size = skb_shinfo(skb)->tso_size;
-       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
-       ninfo->nr_frags = 0;
-       ninfo->frag_list = NULL;
+       skb->next = segs;
+       DEV_GSO_CB(skb)->destructor = skb->destructor;
+       skb->destructor = dev_gso_skb_destructor;
+
+       return 0;
+}
  
-       /* Offset between the two in bytes */
-       offset = data - skb->head;
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       if (likely(!skb->next)) {
+               if (netdev_nit)
+                       dev_queue_xmit_nit(skb, dev);
  
-       /* Free old data. */
-       skb_release_data(skb);
+               if (netif_needs_gso(dev, skb)) {
+                       if (unlikely(dev_gso_segment(skb)))
+                               goto out_kfree_skb;
+                       if (skb->next)
+                               goto gso;
+               }
  
-       skb->head = data;
-       skb->end  = data + size;
+               return dev->hard_start_xmit(skb, dev);
+       }
  
-       /* Set up new pointers */
-       skb->h.raw   += offset;
-       skb->nh.raw  += offset;
-       skb->mac.raw += offset;
-       skb->tail    += offset;
-       skb->data    += offset;
+gso:
+       do {
+               struct sk_buff *nskb = skb->next;
+               int rc;
  
-       /* We are no longer a clone, even if we were. */
-       skb->cloned    = 0;
+               skb->next = nskb->next;
+               nskb->next = NULL;
+               rc = dev->hard_start_xmit(nskb, dev);
+               if (unlikely(rc)) {
+                       nskb->next = skb->next;
+                       skb->next = nskb;
+                       return rc;
+               }
+               if (unlikely(netif_queue_stopped(dev) && skb->next))
+                       return NETDEV_TX_BUSY;
+       } while (skb->next);
+       
+       skb->destructor = DEV_GSO_CB(skb)->destructor;
  
-       skb->tail     += skb->data_len;
-       skb->data_len  = 0;
+out_kfree_skb:
+       kfree_skb(skb);
         return 0;
  }
  
-#define HARD_TX_LOCK_BH(dev, cpu) {                    \
+#define HARD_TX_LOCK(dev, cpu) {                       \
         if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               spin_lock_bh(&dev->xmit_lock);          \
-               dev->xmit_lock_owner = cpu;             \
+               netif_tx_lock(dev);                     \
         }                                               \
  }
  
-#define HARD_TX_UNLOCK_BH(dev) {                       \
+#define HARD_TX_UNLOCK(dev) {                          \
         if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               dev->xmit_lock_owner = -1;              \
-               spin_unlock_bh(&dev->xmit_lock);        \
+               netif_tx_unlock(dev);                   \
         }                                               \
  }
  
-static inline void qdisc_run(struct net_device *dev)
+#ifdef CONFIG_XEN
+inline int skb_checksum_setup(struct sk_buff *skb)
  {
-       while (!netif_queue_stopped(dev) &&
-              qdisc_restart(dev)<0)
-               /* NOTHING */;
+       if (skb->proto_csum_blank) {
+               if (skb->protocol != htons(ETH_P_IP))
+                       goto out;
+               skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+               if (skb->h.raw >= skb->tail)
+                       goto out;
+               switch (skb->nh.iph->protocol) {
+               case IPPROTO_TCP:
+                       skb->csum = offsetof(struct tcphdr, check);
+                       break;
+               case IPPROTO_UDP:
+                       skb->csum = offsetof(struct udphdr, check);
+                       break;
+               default:
+                       if (net_ratelimit())
+                               printk(KERN_ERR "Attempting to checksum a non-"
+                                      "TCP/UDP packet, dropping a protocol"
+                                      " %d packet", skb->nh.iph->protocol);
+                       goto out;
+               }
+               if ((skb->h.raw + skb->csum + 2) > skb->tail)
+                       goto out;
+               skb->ip_summed = CHECKSUM_PARTIAL;
+               skb->proto_csum_blank = 0;
+       }
+       return 0;
+out:
+       return -EPROTO;
  }
+#else
+inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
+#endif
+
  
  /**
   *     dev_queue_xmit - transmit a buffer
@@ -1288,6 +1448,19 @@ static inline void qdisc_run(struct net_device *dev)
   *     A negative errno code is returned on a failure. A success does not
   *     guarantee the frame will be transmitted as it may be dropped due
   *     to congestion or traffic shaping.
+ *
+ * -----------------------------------------------------------------------------------
+ *      I notice this method can also return errors from the queue disciplines,
+ *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
+ *      be positive.
+ *
+ *      Regardless of the return value, the skb is consumed, so it is currently
+ *      difficult to retry a send to this method.  (You can bump the ref count
+ *      before sending to hold a reference for retry if you are careful.)
+ *
+ *      When calling this method, interrupts MUST be enabled.  This is because
+ *      the BH enable code must have IRQs enabled so that it will not deadlock.
+ *          --BLG
   */
  
  int dev_queue_xmit(struct sk_buff *skb)
@@ -1296,9 +1469,19 @@ int dev_queue_xmit(struct sk_buff *skb)
         struct Qdisc *q;
         int rc = -ENOMEM;
  
+       /* If a checksum-deferred packet is forwarded to a device that needs a
+        * checksum, correct the pointers and force checksumming.
+        */
+       if (skb_checksum_setup(skb))
+               goto out_kfree_skb;
+
+       /* GSO will handle the following emulations directly. */
+       if (netif_needs_gso(dev, skb))
+               goto gso;
+
         if (skb_shinfo(skb)->frag_list &&
             !(dev->features & NETIF_F_FRAGLIST) &&
-           __skb_linearize(skb, GFP_ATOMIC))
+           __skb_linearize(skb))
                 goto out_kfree_skb;
  
         /* Fragmented skb is linearized if device does not support SG,
@@ -1307,20 +1490,27 @@ int dev_queue_xmit(struct sk_buff *skb)
          */
         if (skb_shinfo(skb)->nr_frags &&
             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
-           __skb_linearize(skb, GFP_ATOMIC))
+           __skb_linearize(skb))
                 goto out_kfree_skb;
  
         /* If packet is not checksummed and device does not support
          * checksumming for this protocol, complete checksumming here.
          */
-       if (skb->ip_summed == CHECKSUM_HW &&
-           (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+           (!(dev->features & NETIF_F_GEN_CSUM) &&
              (!(dev->features & NETIF_F_IP_CSUM) ||
               skb->protocol != htons(ETH_P_IP))))
-               if (skb_checksum_help(&skb, 0))
+               if (skb_checksum_help(skb))
                         goto out_kfree_skb;
  
-       rcu_read_lock();
+gso:
+       spin_lock_prefetch(&dev->queue_lock);
+
+       /* Disable soft irqs for various locks below. Also 
+        * stops preemption for RCU. 
+        */
+       rcu_read_lock_bh(); 
+
         /* Updates of qdisc are serialized by queue_lock. 
          * The struct Qdisc which is pointed to by qdisc is now a 
          * rcu structure - it may be accessed without acquiring 
@@ -1333,31 +1523,30 @@ int dev_queue_xmit(struct sk_buff *skb)
          * also serializes access to the device queue.
          */
  
-       q = dev->qdisc;
-       smp_read_barrier_depends();
+       q = rcu_dereference(dev->qdisc);
  #ifdef CONFIG_NET_CLS_ACT
         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
  #endif
         if (q->enqueue) {
                 /* Grab device queue */
-               spin_lock_bh(&dev->queue_lock);
-
-               rc = q->enqueue(skb, q);
-
-               qdisc_run(dev);
-
-               spin_unlock_bh(&dev->queue_lock);
-               rcu_read_unlock();
-               rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-               goto out;
+               spin_lock(&dev->queue_lock);
+               q = dev->qdisc;
+               if (q->enqueue) {
+                       rc = q->enqueue(skb, q);
+                       qdisc_run(dev);
+                       spin_unlock(&dev->queue_lock);
+
+                       rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+                       goto out;
+               }
+               spin_unlock(&dev->queue_lock);
         }
-       rcu_read_unlock();
  
         /* The device has no queue. Common case for software devices:
            loopback, all the sorts of tunnels...
  
-          Really, it is unlikely that xmit_lock protection is necessary here.
-          (f.e. loopback and IP tunnels are clean ignoring statistics
+          Really, it is unlikely that netif_tx_lock protection is necessary
+          here.  (f.e. loopback and IP tunnels are clean ignoring statistics
            counters.)
            However, it is possible, that they rely on protection
            made by us here.
@@ -1366,30 +1555,24 @@ int dev_queue_xmit(struct sk_buff *skb)
            Either shot noqueue qdisc, it is even simpler 8)
          */
         if (dev->flags & IFF_UP) {
-               int cpu = get_cpu();
+               int cpu = smp_processor_id(); /* ok because BHs are off */
  
                 if (dev->xmit_lock_owner != cpu) {
  
-                       HARD_TX_LOCK_BH(dev, cpu);
-                       put_cpu();
+                       HARD_TX_LOCK(dev, cpu);
  
                         if (!netif_queue_stopped(dev)) {
-                               if (netdev_nit)
-                                       dev_queue_xmit_nit(skb, dev);
-
                                 rc = 0;
-                               if (!dev->hard_start_xmit(skb, dev)) {
-                                       HARD_TX_UNLOCK_BH(dev);
+                               if (!dev_hard_start_xmit(skb, dev)) {
+                                       HARD_TX_UNLOCK(dev);
                                         goto out;
                                 }
                         }
-                       HARD_TX_UNLOCK_BH(dev);
+                       HARD_TX_UNLOCK(dev);
                         if (net_ratelimit())
                                 printk(KERN_CRIT "Virtual device %s asks to "
                                        "queue packet!\n", dev->name);
-                       goto out_enetdown;
                 } else {
-                       put_cpu();
                         /* Recursion is detected! It is possible,
                          * unfortunately */
                         if (net_ratelimit())
@@ -1397,11 +1580,15 @@ int dev_queue_xmit(struct sk_buff *skb)
                                        "%s, fix it urgently!\n", dev->name);
                 }
         }
-out_enetdown:
+
         rc = -ENETDOWN;
+       rcu_read_unlock_bh();
+
  out_kfree_skb:
         kfree_skb(skb);
+       return rc;
  out:
+       rcu_read_unlock_bh();
         return rc;
  }
  
@@ -1410,131 +1597,13 @@ out:
                         Receiver routines
    =======================================================================*/
  
-int netdev_max_backlog = 300;
+int netdev_max_backlog = 1000;
+int netdev_budget = 300;
  int weight_p = 64;            /* old backlog weight */
-/* These numbers are selected based on intuition and some
- * experimentatiom, if you have more scientific way of doing this
- * please go ahead and fix things.
- */
-int no_cong_thresh = 10;
-int no_cong = 20;
-int lo_cong = 100;
-int mod_cong = 290;
  
  DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
  
  
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-atomic_t netdev_dropping = ATOMIC_INIT(0);
-static unsigned long netdev_fc_mask = 1;
-unsigned long netdev_fc_xoff;
-spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
-
-static struct
-{
-       void (*stimul)(struct net_device *);
-       struct net_device *dev;
-} netdev_fc_slots[BITS_PER_LONG];
-
-int netdev_register_fc(struct net_device *dev,
-                      void (*stimul)(struct net_device *dev))
-{
-       int bit = 0;
-       unsigned long flags;
-
-       spin_lock_irqsave(&netdev_fc_lock, flags);
-       if (netdev_fc_mask != ~0UL) {
-               bit = ffz(netdev_fc_mask);
-               netdev_fc_slots[bit].stimul = stimul;
-               netdev_fc_slots[bit].dev = dev;
-               set_bit(bit, &netdev_fc_mask);
-               clear_bit(bit, &netdev_fc_xoff);
-       }
-       spin_unlock_irqrestore(&netdev_fc_lock, flags);
-       return bit;
-}
-
-void netdev_unregister_fc(int bit)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&netdev_fc_lock, flags);
-       if (bit > 0) {
-               netdev_fc_slots[bit].stimul = NULL;
-               netdev_fc_slots[bit].dev = NULL;
-               clear_bit(bit, &netdev_fc_mask);
-               clear_bit(bit, &netdev_fc_xoff);
-       }
-       spin_unlock_irqrestore(&netdev_fc_lock, flags);
-}
-
-static void netdev_wakeup(void)
-{
-       unsigned long xoff;
-
-       spin_lock(&netdev_fc_lock);
-       xoff = netdev_fc_xoff;
-       netdev_fc_xoff = 0;
-       while (xoff) {
-               int i = ffz(~xoff);
-               xoff &= ~(1 << i);
-               netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
-       }
-       spin_unlock(&netdev_fc_lock);
-}
-#endif
-
-static void get_sample_stats(int cpu)
-{
-#ifdef RAND_LIE
-       unsigned long rd;
-       int rq;
-#endif
-       struct softnet_data *sd = &per_cpu(softnet_data, cpu);
-       int blog = sd->input_pkt_queue.qlen;
-       int avg_blog = sd->avg_blog;
-
-       avg_blog = (avg_blog >> 1) + (blog >> 1);
-
-       if (avg_blog > mod_cong) {
-               /* Above moderate congestion levels. */
-               sd->cng_level = NET_RX_CN_HIGH;
-#ifdef RAND_LIE
-               rd = net_random();
-               rq = rd % netdev_max_backlog;
-               if (rq < avg_blog) /* unlucky bastard */
-                       sd->cng_level = NET_RX_DROP;
-#endif
-       } else if (avg_blog > lo_cong) {
-               sd->cng_level = NET_RX_CN_MOD;
-#ifdef RAND_LIE
-               rd = net_random();
-               rq = rd % netdev_max_backlog;
-                       if (rq < avg_blog) /* unlucky bastard */
-                               sd->cng_level = NET_RX_CN_HIGH;
-#endif
-       } else if (avg_blog > no_cong)
-               sd->cng_level = NET_RX_CN_LOW;
-       else  /* no congestion */
-               sd->cng_level = NET_RX_SUCCESS;
-
-       sd->avg_blog = avg_blog;
-}
-
-#ifdef OFFLINE_SAMPLE
-static void sample_queue(unsigned long dummy)
-{
-/* 10 ms 0r 1ms -- i don't care -- JHS */
-       int next_tick = 1;
-       int cpu = smp_processor_id();
-
-       get_sample_stats(cpu);
-       next_tick += jiffies;
-       mod_timer(&samp_timer, next_tick);
-}
-#endif
-
-
  /**
   *     netif_rx        -       post buffer to the network code
   *     @skb: buffer to post
@@ -1555,65 +1624,37 @@ static void sample_queue(unsigned long dummy)
  
  int netif_rx(struct sk_buff *skb)
  {
-       int this_cpu;
         struct softnet_data *queue;
         unsigned long flags;
  
-#ifdef CONFIG_NETPOLL_RX
-       if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
-               kfree_skb(skb);
+       /* if netpoll wants it, pretend we never saw it */
+       if (netpoll_rx(skb))
                 return NET_RX_DROP;
-       }
-#endif
-       
-       if (!skb->stamp.tv_sec)
-               net_timestamp(&skb->stamp);
+
+       if (!skb->tstamp.off_sec)
+               net_timestamp(skb);
  
         /*
          * The code is rearranged so that the path is the most
          * short when CPU is congested, but is still operating.
          */
         local_irq_save(flags);
-       this_cpu = smp_processor_id();
         queue = &__get_cpu_var(softnet_data);
  
         __get_cpu_var(netdev_rx_stat).total++;
         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
                 if (queue->input_pkt_queue.qlen) {
-                       if (queue->throttle)
-                               goto drop;
-
  enqueue:
                         dev_hold(skb->dev);
                         __skb_queue_tail(&queue->input_pkt_queue, skb);
-#ifndef OFFLINE_SAMPLE
-                       get_sample_stats(this_cpu);
-#endif
                         local_irq_restore(flags);
-                       return queue->cng_level;
-               }
-
-               if (queue->throttle) {
-                       queue->throttle = 0;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-                       if (atomic_dec_and_test(&netdev_dropping))
-                               netdev_wakeup();
-#endif
+                       return NET_RX_SUCCESS;
                 }
  
                 netif_rx_schedule(&queue->backlog_dev);
                 goto enqueue;
         }
  
-       if (!queue->throttle) {
-               queue->throttle = 1;
-               __get_cpu_var(netdev_rx_stat).throttled++;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-               atomic_inc(&netdev_dropping);
-#endif
-       }
-
-drop:
         __get_cpu_var(netdev_rx_stat).dropped++;
         local_irq_restore(flags);
  
@@ -1621,14 +1662,34 @@ drop:
         return NET_RX_DROP;
  }
  
-static __inline__ void skb_bond(struct sk_buff *skb)
+int netif_rx_ni(struct sk_buff *skb)
+{
+       int err;
+
+       preempt_disable();
+       err = netif_rx(skb);
+       if (local_softirq_pending())
+               do_softirq();
+       preempt_enable();
+
+       return err;
+}
+
+EXPORT_SYMBOL(netif_rx_ni);
+
+static inline struct net_device *skb_bond(struct sk_buff *skb)
  {
         struct net_device *dev = skb->dev;
  
         if (dev->master) {
-               skb->real_dev = skb->dev;
+               if (skb_bond_should_drop(skb)) {
+                       kfree_skb(skb);
+                       return NULL;
+               }
                 skb->dev = dev->master;
         }
+
+       return dev;
  }
  
  static void net_tx_action(struct softirq_action *h)
@@ -1678,43 +1739,40 @@ static void net_tx_action(struct softirq_action *h)
  }
  
  static __inline__ int deliver_skb(struct sk_buff *skb,
-                                 struct packet_type *pt_prev, int last)
+                                 struct packet_type *pt_prev,
+                                 struct net_device *orig_dev)
  {
         atomic_inc(&skb->users);
-       return pt_prev->func(skb, skb->dev, pt_prev);
+       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
  }
  
-
  #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct sk_buff *skb);
+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+struct net_bridge;
+struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
+                                               unsigned char *addr);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
  
-static __inline__ int handle_bridge(struct sk_buff *skb,
-                                    struct packet_type *pt_prev)
+static __inline__ int handle_bridge(struct sk_buff **pskb,
+                                   struct packet_type **pt_prev, int *ret,
+                                   struct net_device *orig_dev)
  {
-       int ret = NET_RX_DROP;
-       if (pt_prev)
-               ret = deliver_skb(skb, pt_prev, 0);
+       struct net_bridge_port *port;
  
-       return ret;
-}
-
-#endif
-
-static inline int __handle_bridge(struct sk_buff *skb,
-                       struct packet_type **pt_prev, int *ret)
-{
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
-       if (skb->dev->br_port && skb->pkt_type != PACKET_LOOPBACK) {
-               *ret = handle_bridge(skb, *pt_prev);
-               if (br_handle_frame_hook(skb) == 0)
-                       return 1;
+       if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
+           (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
+               return 0;
  
+       if (*pt_prev) {
+               *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
                 *pt_prev = NULL;
-       }
-#endif
-       return 0;
+       } 
+       
+       return br_handle_frame_hook(port, pskb);
  }
-
+#else
+#define handle_bridge(skb, pt_prev, ret, orig_dev)     (0)
+#endif
  
  #ifdef CONFIG_NET_CLS_ACT
  /* TODO: Maybe we should just force sch_ingress to be compiled in
@@ -1725,7 +1783,7 @@ static inline int __handle_bridge(struct sk_buff *skb,
   * the ingress scheduler, you just cant add policies on ingress.
   *
   */
-int ing_filter(struct sk_buff *skb) 
+static int ing_filter(struct sk_buff *skb) 
  {
         struct Qdisc *q;
         struct net_device *dev = skb->dev;
@@ -1734,22 +1792,19 @@ int ing_filter(struct sk_buff *skb)
         if (dev->qdisc_ingress) {
                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
                 if (MAX_RED_LOOP < ttl++) {
-                       printk("Redir loop detected Dropping packet (%s->%s)\n",
-                               skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                       printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
+                               skb->iif, skb->dev->ifindex);
                         return TC_ACT_SHOT;
                 }
  
                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
  
                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-               if (NULL == skb->input_dev) {
-                       skb->input_dev = skb->dev;
-                       printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
-               }
-               spin_lock(&dev->ingress_lock);
+
+               spin_lock(&dev->queue_lock);
                 if ((q = dev->qdisc_ingress) != NULL)
                         result = q->enqueue(skb, q);
-               spin_unlock(&dev->ingress_lock);
+               spin_unlock(&dev->queue_lock);
  
         }
  
@@ -1760,20 +1815,24 @@ int ing_filter(struct sk_buff *skb)
  int netif_receive_skb(struct sk_buff *skb)
  {
         struct packet_type *ptype, *pt_prev;
+       struct net_device *orig_dev;
         int ret = NET_RX_DROP;
-       unsigned short type;
+       __be16 type;
  
-#ifdef CONFIG_NETPOLL_RX
-       if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
-               kfree_skb(skb);
+       /* if we've gotten here through NAPI, check netpoll */
+       if (skb->dev->poll && netpoll_rx(skb))
                 return NET_RX_DROP;
-       }
-#endif
  
-       if (!skb->stamp.tv_sec)
-               net_timestamp(&skb->stamp);
+       if (!skb->tstamp.off_sec)
+               net_timestamp(skb);
  
-       skb_bond(skb);
+       if (!skb->iif)
+               skb->iif = skb->dev->ifindex;
+
+       orig_dev = skb_bond(skb);
+
+       if (!orig_dev)
+               return NET_RX_DROP;
  
         __get_cpu_var(netdev_rx_stat).total++;
  
@@ -1781,27 +1840,40 @@ int netif_receive_skb(struct sk_buff *skb)
         skb->mac_len = skb->nh.raw - skb->mac.raw;
  
         pt_prev = NULL;
+
+       rcu_read_lock();
+
  #ifdef CONFIG_NET_CLS_ACT
         if (skb->tc_verd & TC_NCLS) {
                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
-               rcu_read_lock();
                 goto ncls;
         }
- #endif
+#endif
+
+#ifdef CONFIG_XEN
+       switch (skb->ip_summed) {
+       case CHECKSUM_UNNECESSARY:
+               skb->proto_data_valid = 1;
+               break;
+       case CHECKSUM_PARTIAL:
+               /* XXX Implement me. */
+       default:
+               skb->proto_data_valid = 0;
+               break;
+       }
+#endif
  
-       rcu_read_lock();
         list_for_each_entry_rcu(ptype, &ptype_all, list) {
                 if (!ptype->dev || ptype->dev == skb->dev) {
                         if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev, 0);
+                               ret = deliver_skb(skb, pt_prev, orig_dev);
                         pt_prev = ptype;
                 }
         }
  
  #ifdef CONFIG_NET_CLS_ACT
         if (pt_prev) {
-               atomic_inc(&skb->users);
-               ret = pt_prev->func(skb, skb->dev, pt_prev);
+               ret = deliver_skb(skb, pt_prev, orig_dev);
                 pt_prev = NULL; /* noone else should process this after*/
         } else {
                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1818,9 +1890,7 @@ int netif_receive_skb(struct sk_buff *skb)
  ncls:
  #endif
  
-       handle_diverter(skb);
-
-       if (__handle_bridge(skb, &pt_prev, &ret))
+       if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
                 goto out;
  
         type = skb->protocol;
@@ -1828,13 +1898,13 @@ ncls:
                 if (ptype->type == type &&
                     (!ptype->dev || ptype->dev == skb->dev)) {
                         if (pt_prev) 
-                               ret = deliver_skb(skb, pt_prev, 0);
+                               ret = deliver_skb(skb, pt_prev, orig_dev);
                         pt_prev = ptype;
                 }
         }
  
         if (pt_prev) {
-               ret = pt_prev->func(skb, skb->dev, pt_prev);
+               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
         } else {
                 kfree_skb(skb);
                 /* Jamal, now you will not able to escape explaining
@@ -1855,6 +1925,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
         struct softnet_data *queue = &__get_cpu_var(softnet_data);
         unsigned long start_time = jiffies;
  
+       backlog_dev->weight = weight_p;
         for (;;) {
                 struct sk_buff *skb;
                 struct net_device *dev;
@@ -1876,16 +1947,6 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
                 if (work >= quota || jiffies - start_time > 1)
                         break;
  
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-               if (queue->throttle &&
-                   queue->input_pkt_queue.qlen < no_cong_thresh ) {
-                       queue->throttle = 0;
-                       if (atomic_dec_and_test(&netdev_dropping)) {
-                               netdev_wakeup();
-                               break;
-                       }
-               }
-#endif
         }
  
         backlog_dev->quota -= work;
@@ -1900,13 +1961,6 @@ job_done:
         smp_mb__before_clear_bit();
         netif_poll_enable(backlog_dev);
  
-       if (queue->throttle) {
-               queue->throttle = 0;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-               if (atomic_dec_and_test(&netdev_dropping))
-                       netdev_wakeup();
-#endif
-       }
         local_irq_enable();
         return 0;
  }
@@ -1915,9 +1969,9 @@ static void net_rx_action(struct softirq_action *h)
  {
         struct softnet_data *queue = &__get_cpu_var(softnet_data);
         unsigned long start_time = jiffies;
-       int budget = netdev_max_backlog;
+       int budget = netdev_budget;
+       void *have;
  
-       
         local_irq_disable();
  
         while (!list_empty(&queue->poll_list)) {
@@ -1930,21 +1984,36 @@ static void net_rx_action(struct softirq_action *h)
  
                 dev = list_entry(queue->poll_list.next,
                                  struct net_device, poll_list);
+               have = netpoll_poll_lock(dev);
  
                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+                       netpoll_poll_unlock(have);
                         local_irq_disable();
-                       list_del(&dev->poll_list);
-                       list_add_tail(&dev->poll_list, &queue->poll_list);
+                       list_move_tail(&dev->poll_list, &queue->poll_list);
                         if (dev->quota < 0)
                                 dev->quota += dev->weight;
                         else
                                 dev->quota = dev->weight;
                 } else {
+                       netpoll_poll_unlock(have);
                         dev_put(dev);
                         local_irq_disable();
                 }
         }
  out:
+#ifdef CONFIG_NET_DMA
+       /*
+        * There may not be any more sk_buffs coming right now, so push
+        * any pending DMA copies to hardware
+        */
+       if (net_dma_client) {
+               struct dma_chan *chan;
+               rcu_read_lock();
+               list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
+                       dma_async_memcpy_issue_pending(chan);
+               rcu_read_unlock();
+       }
+#endif
         local_irq_enable();
         return;
  
@@ -2043,7 +2112,8 @@ static int dev_ifconf(char __user *arg)
  
         total = 0;
         for (dev = dev_base; dev; dev = dev->next) {
-               if (!dev_in_nx_info(dev, current->nx_info))
+               if (vx_flags(VXF_HIDE_NETIF, 0) &&
+                       !dev_in_nx_info(dev, current->nx_info))
                         continue;
                 for (i = 0; i < NPROTO; i++) {
                         if (gifconf_list[i]) {
@@ -2107,7 +2177,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
  {
         struct nx_info *nxi = current->nx_info;
  
-       if (!dev_in_nx_info(dev, nxi))
+       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
                 return;
         if (dev->get_stats) {
                 struct net_device_stats *stats = dev->get_stats(dev);
@@ -2183,15 +2253,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
         struct netif_rx_stats *s = v;
  
         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-                  s->total, s->dropped, s->time_squeeze, s->throttled,
-                  s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
-                  s->fastroute_deferred_out,
-#if 0
-                  s->fastroute_latency_reduction
-#else
-                  s->cpu_collision
-#endif
-                 );
+                  s->total, s->dropped, s->time_squeeze, 0,
+                  0, 0, 0, 0, /* was fastroute */
+                  s->cpu_collision );
         return 0;
  }
  
@@ -2235,7 +2299,7 @@ static struct file_operations softnet_seq_fops = {
         .release = seq_release,
  };
  
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
  extern int wireless_proc_init(void);
  #else
  #define wireless_proc_init() 0
@@ -2309,7 +2373,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
   *     @dev: device
   *     @inc: modifier
   *
- *     Add or remove promsicuity from a device. While the count in the device
+ *     Add or remove promiscuity from a device. While the count in the device
   *     remains above zero the interface remains promiscuous. Once it hits zero
   *     the device reverts back to normal filtering operation. A negative inc
   *     value is used to drop promiscuity on the device.
@@ -2318,14 +2382,21 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
  {
         unsigned short old_flags = dev->flags;
  
-       dev->flags |= IFF_PROMISC;
         if ((dev->promiscuity += inc) == 0)
                 dev->flags &= ~IFF_PROMISC;
-       if (dev->flags ^ old_flags) {
+       else
+               dev->flags |= IFF_PROMISC;
+       if (dev->flags != old_flags) {
                 dev_mc_upload(dev);
                 printk(KERN_INFO "device %s %s promiscuous mode\n",
                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
                                                                "left");
+               audit_log(current->audit_context, GFP_ATOMIC,
+                       AUDIT_ANOM_PROMISCUOUS,
+                       "dev=%s prom=%d old_prom=%d auid=%u",
+                       dev->name, (dev->flags & IFF_PROMISC),
+                       (old_flags & IFF_PROMISC),
+                       audit_get_loginuid(current->audit_context)); 
         }
  }
  
@@ -2358,12 +2429,20 @@ unsigned dev_get_flags(const struct net_device *dev)
  
         flags = (dev->flags & ~(IFF_PROMISC |
                                 IFF_ALLMULTI |
-                               IFF_RUNNING)) | 
+                               IFF_RUNNING |
+                               IFF_LOWER_UP |
+                               IFF_DORMANT)) |
                 (dev->gflags & (IFF_PROMISC |
                                 IFF_ALLMULTI));
  
-       if (netif_running(dev) && netif_carrier_ok(dev))
-               flags |= IFF_RUNNING;
+       if (netif_running(dev)) {
+               if (netif_oper_up(dev))
+                       flags |= IFF_RUNNING;
+               if (netif_carrier_ok(dev))
+                       flags |= IFF_LOWER_UP;
+               if (netif_dormant(dev))
+                       flags |= IFF_DORMANT;
+       }
  
         return flags;
  }
@@ -2406,7 +2485,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
         if (dev->flags & IFF_UP &&
             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
                                           IFF_VOLATILE)))
-               notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+               raw_notifier_call_chain(&netdev_chain,
+                               NETDEV_CHANGE, dev);
  
         if ((flags ^ dev->gflags) & IFF_PROMISC) {
                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2450,11 +2530,27 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
         else
                 dev->mtu = new_mtu;
         if (!err && dev->flags & IFF_UP)
-               notifier_call_chain(&netdev_chain,
-                                   NETDEV_CHANGEMTU, dev);
+               raw_notifier_call_chain(&netdev_chain,
+                               NETDEV_CHANGEMTU, dev);
         return err;
  }
  
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+{
+       int err;
+
+       if (!dev->set_mac_address)
+               return -EOPNOTSUPP;
+       if (sa->sa_family != dev->type)
+               return -EINVAL;
+       if (!netif_device_present(dev))
+               return -ENODEV;
+       err = dev->set_mac_address(dev, sa);
+       if (!err)
+               raw_notifier_call_chain(&netdev_chain,
+                               NETDEV_CHANGEADDR, dev);
+       return err;
+}
  
  /*
   *     Perform the SIOCxIFxxx calls.
@@ -2492,30 +2588,23 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
                         return dev_set_mtu(dev, ifr->ifr_mtu);
  
                 case SIOCGIFHWADDR:
-                       memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-                              min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+                       if (!dev->addr_len)
+                               memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+                       else
+                               memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+                                      min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
                         ifr->ifr_hwaddr.sa_family = dev->type;
                         return 0;
  
                 case SIOCSIFHWADDR:
-                       if (!dev->set_mac_address)
-                               return -EOPNOTSUPP;
-                       if (ifr->ifr_hwaddr.sa_family != dev->type)
-                               return -EINVAL;
-                       if (!netif_device_present(dev))
-                               return -ENODEV;
-                       err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
-                       if (!err)
-                               notifier_call_chain(&netdev_chain,
-                                                   NETDEV_CHANGEADDR, dev);
-                       return err;
+                       return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
  
                 case SIOCSIFHWBROADCAST:
                         if (ifr->ifr_hwaddr.sa_family != dev->type)
                                 return -EINVAL;
                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
-                       notifier_call_chain(&netdev_chain,
+                       raw_notifier_call_chain(&netdev_chain,
                                             NETDEV_CHANGEADDR, dev);
                         return 0;
  
@@ -2634,9 +2723,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
          */
  
         if (cmd == SIOCGIFCONF) {
-               rtnl_shlock();
+               rtnl_lock();
                 ret = dev_ifconf((char __user *) arg);
-               rtnl_shunlock();
+               rtnl_unlock();
                 return ret;
         }
         if (cmd == SIOCGIFNAME)
@@ -2741,13 +2830,14 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
                 case SIOCBONDENSLAVE:
                 case SIOCBONDRELEASE:
                 case SIOCBONDSETHWADDR:
-               case SIOCBONDSLAVEINFOQUERY:
-               case SIOCBONDINFOQUERY:
                 case SIOCBONDCHANGEACTIVE:
                 case SIOCBRADDIF:
                 case SIOCBRDELIF:
                         if (!capable(CAP_NET_ADMIN))
                                 return -EPERM;
+                       /* fall through */
+               case SIOCBONDSLAVEINFOQUERY:
+               case SIOCBONDINFOQUERY:
                         dev_load(ifr.ifr_name);
                         rtnl_lock();
                         ret = dev_ifsioc(&ifr, cmd);
@@ -2779,13 +2869,14 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
                                         ret = -EFAULT;
                                 return ret;
                         }
-#ifdef WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT
                         /* Take care of Wireless Extensions */
                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
                                 /* If command is `set a parameter', or
                                  * `get the encoding parameters', check if
                                  * the user has the right to do it */
-                               if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+                               if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
+                                   || cmd == SIOCGIWENCODEEXT) {
                                         if (!capable(CAP_NET_ADMIN))
                                                 return -EPERM;
                                 }
@@ -2794,13 +2885,13 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
                                 /* Follow me in net/core/wireless.c */
                                 ret = wireless_process_ioctl(&ifr, cmd);
                                 rtnl_unlock();
-                               if (!ret && IW_IS_GET(cmd) &&
+                               if (IW_IS_GET(cmd) &&
                                     copy_to_user(arg, &ifr,
                                                  sizeof(struct ifreq)))
                                         ret = -EFAULT;
                                 return ret;
                         }
-#endif /* WIRELESS_EXT */
+#endif /* CONFIG_WIRELESS_EXT */
                         return -EINVAL;
         }
  }
@@ -2813,7 +2904,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
   *     number.  The caller must hold the rtnl semaphore or the
   *     dev_base_lock to be sure it remains unique.
   */
-int dev_new_index(void)
+static int dev_new_index(void)
  {
         static int ifindex;
         for (;;) {
@@ -2827,7 +2918,7 @@ int dev_new_index(void)
  static int dev_boot_phase = 1;
  
  /* Delayed registration/unregisteration */
-static spinlock_t net_todo_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(net_todo_list_lock);
  static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
  
  static inline void net_set_todo(struct net_device *dev)
@@ -2846,8 +2937,7 @@ static inline void net_set_todo(struct net_device *dev)
   *     chain. 0 is returned on success. A negative errno code is returned
   *     on a failure to set up the device, or if the name is a duplicate.
   *
- *     Callers must hold the rtnl semaphore.  See the comment at the
- *     end of Space.c for details about the locking.  You may want
+ *     Callers must hold the rtnl semaphore. You may want
   *     register_netdev() instead of this.
   *
   *     BUGS:
@@ -2864,20 +2954,18 @@ int register_netdevice(struct net_device *dev)
         BUG_ON(dev_boot_phase);
         ASSERT_RTNL();
  
+       might_sleep();
+
         /* When net_device's are persistent, this will be fatal. */
         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
  
         spin_lock_init(&dev->queue_lock);
-       spin_lock_init(&dev->xmit_lock);
+       spin_lock_init(&dev->_xmit_lock);
         dev->xmit_lock_owner = -1;
  #ifdef CONFIG_NET_CLS_ACT
         spin_lock_init(&dev->ingress_lock);
  #endif
  
-       ret = alloc_divert_blk(dev);
-       if (ret)
-               goto out;
-
         dev->iflink = -1;
  
         /* Init, if this function is available */
@@ -2886,13 +2974,13 @@ int register_netdevice(struct net_device *dev)
                 if (ret) {
                         if (ret > 0)
                                 ret = -EIO;
-                       goto out_err;
+                       goto out;
                 }
         }
   
         if (!dev_valid_name(dev->name)) {
                 ret = -EINVAL;
-               goto out_err;
+               goto out;
         }
  
         dev->ifindex = dev_new_index();
@@ -2906,20 +2994,40 @@ int register_netdevice(struct net_device *dev)
                         = hlist_entry(p, struct net_device, name_hlist);
                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
                         ret = -EEXIST;
-                       goto out_err;
+                       goto out;
                 }
         }
  
         /* Fix illegal SG+CSUM combinations. */
         if ((dev->features & NETIF_F_SG) &&
-           !(dev->features & (NETIF_F_IP_CSUM |
-                              NETIF_F_NO_CSUM |
-                              NETIF_F_HW_CSUM))) {
-               printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+           !(dev->features & NETIF_F_ALL_CSUM)) {
+               printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
                        dev->name);
                 dev->features &= ~NETIF_F_SG;
         }
  
+       /* TSO requires that SG is present as well. */
+       if ((dev->features & NETIF_F_TSO) &&
+           !(dev->features & NETIF_F_SG)) {
+               printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
+                      dev->name);
+               dev->features &= ~NETIF_F_TSO;
+       }
+       if (dev->features & NETIF_F_UFO) {
+               if (!(dev->features & NETIF_F_HW_CSUM)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_HW_CSUM feature.\n",
+                                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+               if (!(dev->features & NETIF_F_SG)) {
+                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+                                       "NETIF_F_SG feature.\n",
+                                       dev->name);
+                       dev->features &= ~NETIF_F_UFO;
+               }
+       }
+
         /*
          *      nil rebuild_header routine,
          *      that should be never called and used as just bug trap.
@@ -2928,6 +3036,11 @@ int register_netdevice(struct net_device *dev)
         if (!dev->rebuild_header)
                 dev->rebuild_header = default_rebuild_header;
  
+       ret = netdev_register_sysfs(dev);
+       if (ret)
+               goto out;
+       dev->reg_state = NETREG_REGISTERED;
+
         /*
          *      Default initial state at registry is that the
          *      device is present.
@@ -2943,23 +3056,53 @@ int register_netdevice(struct net_device *dev)
         hlist_add_head(&dev->name_hlist, head);
         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
         dev_hold(dev);
-       dev->reg_state = NETREG_REGISTERING;
         write_unlock_bh(&dev_base_lock);
  
         /* Notify protocols, that a new device appeared. */
-       notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+       raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
  
-       /* Finish registration after unlock */
-       net_set_todo(dev);
         ret = 0;
  
  out:
         return ret;
-out_err:
-       free_divert_blk(dev);
-       goto out;
  }
  
+/**
+ *     register_netdev - register a network device
+ *     @dev: device to register
+ *
+ *     Take a completed network device structure and add it to the kernel
+ *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *     chain. 0 is returned on success. A negative errno code is returned
+ *     on a failure to set up the device, or if the name is a duplicate.
+ *
+ *     This is a wrapper around register_netdev that takes the rtnl semaphore
+ *     and expands the device name if you passed a format string to
+ *     alloc_netdev.
+ */
+int register_netdev(struct net_device *dev)
+{
+       int err;
+
+       rtnl_lock();
+
+       /*
+        * If the name is a format string the caller wants us to do a
+        * name allocation.
+        */
+       if (strchr(dev->name, '%')) {
+               err = dev_alloc_name(dev, dev->name);
+               if (err < 0)
+                       goto out;
+       }
+       
+       err = register_netdevice(dev);
+out:
+       rtnl_unlock();
+       return err;
+}
+EXPORT_SYMBOL(register_netdev);
+
  /*
   * netdev_wait_allrefs - wait until all references are gone.
   *
@@ -2978,10 +3121,10 @@ static void netdev_wait_allrefs(struct net_device *dev)
         rebroadcast_time = warning_time = jiffies;
         while (atomic_read(&dev->refcnt) != 0) {
                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
-                       rtnl_shlock();
+                       rtnl_lock();
  
                         /* Rebroadcast unregister notification */
-                       notifier_call_chain(&netdev_chain,
+                       raw_notifier_call_chain(&netdev_chain,
                                             NETDEV_UNREGISTER, dev);
  
                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
@@ -2995,13 +3138,12 @@ static void netdev_wait_allrefs(struct net_device *dev)
                                 linkwatch_run_queue();
                         }
  
-                       rtnl_shunlock();
+                       __rtnl_unlock();
  
                         rebroadcast_time = jiffies;
                 }
  
-               current->state = TASK_INTERRUPTIBLE;
-               schedule_timeout(HZ / 4);
+               msleep(250);
  
                 if (time_after(jiffies, warning_time + 10 * HZ)) {
                         printk(KERN_EMERG "unregister_netdevice: "
@@ -3029,20 +3171,18 @@ static void netdev_wait_allrefs(struct net_device *dev)
   *
   * We are invoked by rtnl_unlock() after it drops the semaphore.
   * This allows us to deal with problems:
- * 1) We can create/delete sysfs objects which invoke hotplug
+ * 1) We can delete sysfs objects which invoke hotplug
   *    without deadlocking with linkwatch via keventd.
   * 2) Since we run with the RTNL semaphore not held, we can sleep
   *    safely in order to wait for the netdev refcnt to drop to zero.
   */
-static DECLARE_MUTEX(net_todo_run_mutex);
+static DEFINE_MUTEX(net_todo_run_mutex);
  void netdev_run_todo(void)
  {
-       struct list_head list = LIST_HEAD_INIT(list);
-       int err;
-
+       struct list_head list;
  
         /* Need to guard against multiple cpu's getting out of order. */
-       down(&net_todo_run_mutex);
+       mutex_lock(&net_todo_run_mutex);
  
         /* Not safe to do outside the semaphore.  We must not return
          * until all unregister events invoked by the local processor
@@ -3054,53 +3194,83 @@ void netdev_run_todo(void)
  
         /* Snapshot list, allow later requests */
         spin_lock(&net_todo_list_lock);
-       list_splice_init(&net_todo_list, &list);
+       list_replace_init(&net_todo_list, &list);
         spin_unlock(&net_todo_list_lock);
-               
+
         while (!list_empty(&list)) {
                 struct net_device *dev
                         = list_entry(list.next, struct net_device, todo_list);
                 list_del(&dev->todo_list);
  
-               switch(dev->reg_state) {
-               case NETREG_REGISTERING:
-                       err = netdev_register_sysfs(dev);
-                       if (err)
-                               printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
-                                      dev->name, err);
-                       dev->reg_state = NETREG_REGISTERED;
-                       break;
+               if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
+                       printk(KERN_ERR "network todo '%s' but state %d\n",
+                              dev->name, dev->reg_state);
+                       dump_stack();
+                       continue;
+               }
  
-               case NETREG_UNREGISTERING:
-                       netdev_unregister_sysfs(dev);
-                       dev->reg_state = NETREG_UNREGISTERED;
+               netdev_unregister_sysfs(dev);
+               dev->reg_state = NETREG_UNREGISTERED;
  
-                       netdev_wait_allrefs(dev);
+               netdev_wait_allrefs(dev);
  
-                       /* paranoia */
-                       BUG_ON(atomic_read(&dev->refcnt));
-                       BUG_TRAP(!dev->ip_ptr);
-                       BUG_TRAP(!dev->ip6_ptr);
-                       BUG_TRAP(!dev->dn_ptr);
+               /* paranoia */
+               BUG_ON(atomic_read(&dev->refcnt));
+               BUG_TRAP(!dev->ip_ptr);
+               BUG_TRAP(!dev->ip6_ptr);
+               BUG_TRAP(!dev->dn_ptr);
  
+               /* It must be the very last action,
+                * after this 'dev' may point to freed up memory.
+                */
+               if (dev->destructor)
+                       dev->destructor(dev);
+       }
  
-                       /* It must be the very last action, 
-                        * after this 'dev' may point to freed up memory.
-                        */
-                       if (dev->destructor)
-                               dev->destructor(dev);
-                       break;
+out:
+       mutex_unlock(&net_todo_run_mutex);
+}
  
-               default:
-                       printk(KERN_ERR "network todo '%s' but state %d\n",
-                              dev->name, dev->reg_state);
-                       break;
-               }
+/**
+ *     alloc_netdev - allocate network device
+ *     @sizeof_priv:   size of private data to allocate space for
+ *     @name:          device name format string
+ *     @setup:         callback to initialize device
+ *
+ *     Allocates a struct net_device with private data area for driver use
+ *     and performs basic initialization.
+ */
+struct net_device *alloc_netdev(int sizeof_priv, const char *name,
+               void (*setup)(struct net_device *))
+{
+       void *p;
+       struct net_device *dev;
+       int alloc_size;
+
+       BUG_ON(strlen(name) >= sizeof(dev->name));
+
+       /* ensure 32-byte alignment of both the device and private area */
+       alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+       alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+       p = kzalloc(alloc_size, GFP_KERNEL);
+       if (!p) {
+               printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
+               return NULL;
         }
  
-out:
-       up(&net_todo_run_mutex);
+       dev = (struct net_device *)
+               (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+       dev->padded = (char *)dev - (char *)p;
+
+       if (sizeof_priv)
+               dev->priv = netdev_priv(dev);
+
+       setup(dev);
+       strcpy(dev->name, name);
+       return dev;
  }
+EXPORT_SYMBOL(alloc_netdev);
  
  /**
   *     free_netdev - free network device
@@ -3113,7 +3283,7 @@ out:
  void free_netdev(struct net_device *dev)
  {
  #ifdef CONFIG_SYSFS
-       /*  Compatiablity with error handling in drivers */
+       /*  Compatibility with error handling in drivers */
         if (dev->reg_state == NETREG_UNINITIALIZED) {
                 kfree((char *)dev - dev->padded);
                 return;
@@ -3133,7 +3303,7 @@ void free_netdev(struct net_device *dev)
  void synchronize_net(void) 
  {
         might_sleep();
-       synchronize_kernel();
+       synchronize_rcu();
  }
  
  /**
@@ -3144,8 +3314,7 @@ void synchronize_net(void)
   *     from the kernel tables. On success 0 is returned, on a failure
   *     a negative errno code is returned.
   *
- *     Callers must hold the rtnl semaphore.  See the comment at the
- *     end of Space.c for details about the locking.  You may want
+ *     Callers must hold the rtnl semaphore.  You may want
   *     unregister_netdev() instead of this.
   */
  
@@ -3199,7 +3368,7 @@ int unregister_netdevice(struct net_device *dev)
         /* Notify protocols, that we are about to destroy
            this device. They should clean all the things.
         */
-       notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+       raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
         
         /*
          *      Flush the multicast chain
@@ -3212,8 +3381,6 @@ int unregister_netdevice(struct net_device *dev)
         /* Notifier chain MUST detach us from master device. */
         BUG_TRAP(!dev->master);
  
-       free_divert_blk(dev);
-
         /* Finish processing unregister after unlock */
         net_set_todo(dev);
  
@@ -3223,7 +3390,27 @@ int unregister_netdevice(struct net_device *dev)
         return 0;
  }
  
-#ifdef CONFIG_HOTPLUG_CPU
+/**
+ *     unregister_netdev - remove device from the kernel
+ *     @dev: device
+ *
+ *     This function shuts down a device interface and removes it
+ *     from the kernel tables. On success 0 is returned, on a failure
+ *     a negative errno code is returned.
+ *
+ *     This is just a wrapper for unregister_netdevice that takes
+ *     the rtnl semaphore.  In general you want to use this and not
+ *     unregister_netdevice.
+ */
+void unregister_netdev(struct net_device *dev)
+{
+       rtnl_lock();
+       unregister_netdevice(dev);
+       rtnl_unlock();
+}
+
+EXPORT_SYMBOL(unregister_netdev);
+
  static int dev_cpu_callback(struct notifier_block *nfb,
                             unsigned long action,
                             void *ocpu)
@@ -3267,8 +3454,84 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  
         return NOTIFY_OK;
  }
-#endif /* CONFIG_HOTPLUG_CPU */
  
+#ifdef CONFIG_NET_DMA
+/**
+ * net_dma_rebalance -
+ * This is called when the number of channels allocated to the net_dma_client
+ * changes.  The net_dma_client tries to have one DMA channel per CPU.
+ */
+static void net_dma_rebalance(void)
+{
+       unsigned int cpu, i, n;
+       struct dma_chan *chan;
+
+       if (net_dma_count == 0) {
+               for_each_online_cpu(cpu)
+                       rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
+               return;
+       }
+
+       i = 0;
+       cpu = first_cpu(cpu_online_map);
+
+       rcu_read_lock();
+       list_for_each_entry(chan, &net_dma_client->channels, client_node) {
+               n = ((num_online_cpus() / net_dma_count)
+                  + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
+
+               while(n) {
+                       per_cpu(softnet_data, cpu).net_dma = chan;
+                       cpu = next_cpu(cpu, cpu_online_map);
+                       n--;
+               }
+               i++;
+       }
+       rcu_read_unlock();
+}
+
+/**
+ * netdev_dma_event - event callback for the net_dma_client
+ * @client: should always be net_dma_client
+ * @chan: DMA channel for the event
+ * @event: event type
+ */
+static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
+       enum dma_event event)
+{
+       spin_lock(&net_dma_event_lock);
+       switch (event) {
+       case DMA_RESOURCE_ADDED:
+               net_dma_count++;
+               net_dma_rebalance();
+               break;
+       case DMA_RESOURCE_REMOVED:
+               net_dma_count--;
+               net_dma_rebalance();
+               break;
+       default:
+               break;
+       }
+       spin_unlock(&net_dma_event_lock);
+}
+
+/**
+ * netdev_dma_regiser - register the networking subsystem as a DMA client
+ */
+static int __init netdev_dma_register(void)
+{
+       spin_lock_init(&net_dma_event_lock);
+       net_dma_client = dma_async_client_register(netdev_dma_event);
+       if (net_dma_client == NULL)
+               return -ENOMEM;
+
+       dma_async_client_chan_request(net_dma_client, num_online_cpus());
+       return 0;
+}
+
+#else
+static int __init netdev_dma_register(void) { return -ENODEV; }
+#endif /* CONFIG_NET_DMA */
  
  /*
   *     Initialize the DEV module. At boot time this walks the device list and
@@ -3307,14 +3570,11 @@ static int __init net_dev_init(void)
          *      Initialise the packet receive queues.
          */
  
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_possible_cpu(i) {
                 struct softnet_data *queue;
  
                 queue = &per_cpu(softnet_data, i);
                 skb_queue_head_init(&queue->input_pkt_queue);
-               queue->throttle = 0;
-               queue->cng_level = 0;
-               queue->avg_blog = 10; /* arbitrary non-zero */
                 queue->completion_queue = NULL;
                 INIT_LIST_HEAD(&queue->poll_list);
                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
@@ -3323,10 +3583,7 @@ static int __init net_dev_init(void)
                 atomic_set(&queue->backlog_dev.refcnt, 1);
         }
  
-#ifdef OFFLINE_SAMPLE
-       samp_timer.expires = jiffies + (10 * HZ);
-       add_timer(&samp_timer);
-#endif
+       netdev_dma_register();
  
         dev_boot_phase = 0;
  
@@ -3343,30 +3600,24 @@ out:
  
  subsys_initcall(net_dev_init);
  
-EXPORT_SYMBOL(__dev_get);
-EXPORT_SYMBOL(__dev_get_by_flags);
  EXPORT_SYMBOL(__dev_get_by_index);
  EXPORT_SYMBOL(__dev_get_by_name);
  EXPORT_SYMBOL(__dev_remove_pack);
-EXPORT_SYMBOL(__skb_linearize);
-EXPORT_SYMBOL(call_netdevice_notifiers);
+EXPORT_SYMBOL(dev_valid_name);
  EXPORT_SYMBOL(dev_add_pack);
  EXPORT_SYMBOL(dev_alloc_name);
  EXPORT_SYMBOL(dev_close);
  EXPORT_SYMBOL(dev_get_by_flags);
  EXPORT_SYMBOL(dev_get_by_index);
  EXPORT_SYMBOL(dev_get_by_name);
-EXPORT_SYMBOL(dev_getbyhwaddr);
-EXPORT_SYMBOL(dev_ioctl);
-EXPORT_SYMBOL(dev_new_index);
  EXPORT_SYMBOL(dev_open);
  EXPORT_SYMBOL(dev_queue_xmit);
-EXPORT_SYMBOL(dev_queue_xmit_nit);
  EXPORT_SYMBOL(dev_remove_pack);
  EXPORT_SYMBOL(dev_set_allmulti);
  EXPORT_SYMBOL(dev_set_promiscuity);
  EXPORT_SYMBOL(dev_change_flags);
  EXPORT_SYMBOL(dev_set_mtu);
+EXPORT_SYMBOL(dev_set_mac_address);
  EXPORT_SYMBOL(free_netdev);
  EXPORT_SYMBOL(netdev_boot_setup_check);
  EXPORT_SYMBOL(netdev_set_master);
@@ -3380,24 +3631,19 @@ EXPORT_SYMBOL(skb_checksum_help);
  EXPORT_SYMBOL(synchronize_net);
  EXPORT_SYMBOL(unregister_netdevice);
  EXPORT_SYMBOL(unregister_netdevice_notifier);
+EXPORT_SYMBOL(net_enable_timestamp);
+EXPORT_SYMBOL(net_disable_timestamp);
+EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(skb_checksum_setup);
  
  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
  EXPORT_SYMBOL(br_handle_frame_hook);
+EXPORT_SYMBOL(br_fdb_get_hook);
+EXPORT_SYMBOL(br_fdb_put_hook);
  #endif
  
  #ifdef CONFIG_KMOD
  EXPORT_SYMBOL(dev_load);
  #endif
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-EXPORT_SYMBOL(netdev_dropping);
-EXPORT_SYMBOL(netdev_fc_xoff);
-EXPORT_SYMBOL(netdev_register_fc);
-EXPORT_SYMBOL(netdev_unregister_fc);
-#endif
-
-#ifdef CONFIG_NET_CLS_ACT
-EXPORT_SYMBOL(ing_filter);
-#endif
-
  
  EXPORT_PER_CPU_SYMBOL(softnet_data);