X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fdev.c;h=d8bab16e9a61562b6e82b64b23470801fd34b509;hb=4e76c8a9fa413ccc09d3f7f664183dcce3555d57;hp=43c273e74a7d314639e705476a9106342154a224;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/core/dev.c b/net/core/dev.c index 43c273e74..d8bab16e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. * * Derived from the non IP parts of dev.c 1.0.19 - * Authors: Ross Biro, + * Authors: Ross Biro * Fred N. van Kempen, * Mark Evans, * @@ -74,12 +74,14 @@ #include #include -#include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -107,23 +109,20 @@ #include #include #include -#ifdef CONFIG_NET_RADIO -#include /* Note : will define WIRELESS_EXT */ +#include +#include +#include #include -#endif /* CONFIG_NET_RADIO */ #include - -/* This define, if set, will randomly drop a packet when congestion - * is more than moderate. It helps fairness in the multi-interface - * case when one of them is a hog, but it kills performance for the - * single interface case so it is off now by default. - */ -#undef RAND_LIE - -/* Setting this will sample the queue lengths and thus congestion - * via a timer instead of as each packet is received. - */ -#undef OFFLINE_SAMPLE +#include +#include +#include + +#ifdef CONFIG_XEN +#include +#include +#include +#endif /* * The list of packet types we will receive (as opposed to discard) @@ -136,7 +135,7 @@ * sure which should go first, but I bet it won't make much * difference if we are running VLANs. The good news is that * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversly affected. + * the average user (w/out VLANs) will not be adversely affected. * --BLG * * 0800 IP @@ -153,17 +152,12 @@ * 86DD IPv6 */ -static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ -#ifdef OFFLINE_SAMPLE -static void sample_queue(unsigned long dummy); -static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); -#endif - /* - * The @dev_base list is protected by @dev_base_lock and the rtln + * The @dev_base list is protected by @dev_base_lock and the rtnl * semaphore. * * Pure readers hold dev_base_lock for reading. @@ -182,8 +176,8 @@ static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); * semaphore held. */ struct net_device *dev_base; -struct net_device **dev_tail = &dev_base; -rwlock_t dev_base_lock = RW_LOCK_UNLOCKED; +static struct net_device **dev_tail = &dev_base; +DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base); EXPORT_SYMBOL(dev_base_lock); @@ -207,22 +201,23 @@ static inline struct hlist_head *dev_index_hash(int ifindex) * Our notifier list */ -static struct notifier_block *netdev_chain; +static RAW_NOTIFIER_HEAD(netdev_chain); /* * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; - -#ifdef CONFIG_NET_FASTROUTE -int netdev_fastroute; -int netdev_fastroute_obstacles; -#endif +DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; +#ifdef CONFIG_SYSFS extern int netdev_sysfs_init(void); extern int netdev_register_sysfs(struct net_device *); -extern int netdev_unregister_sysfs(struct net_device *); +extern void netdev_unregister_sysfs(struct net_device *); +#else +#define netdev_sysfs_init() (0) +#define netdev_register_sysfs(dev) (0) +#define netdev_unregister_sysfs(dev) do { } while(0) +#endif /******************************************************************************* @@ -271,12 +266,6 @@ void dev_add_pack(struct packet_type *pt) int hash; spin_lock_bh(&ptype_lock); -#ifdef CONFIG_NET_FASTROUTE - if (pt->af_packet_priv) { - netdev_fastroute_obstacles++; - dev_clear_fastroute(pt->dev); - } -#endif if (pt->type == htons(ETH_P_ALL)) { netdev_nit++; list_add_rcu(&pt->list, &ptype_all); @@ -287,10 +276,6 @@ void dev_add_pack(struct packet_type *pt) spin_unlock_bh(&ptype_lock); } -extern void linkwatch_run_queue(void); - - - /** * __dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -319,10 +304,6 @@ void __dev_remove_pack(struct packet_type *pt) list_for_each_entry(pt1, head, list) { if (pt == pt1) { -#ifdef CONFIG_NET_FASTROUTE - if (pt->af_packet_priv) - netdev_fastroute_obstacles--; -#endif list_del_rcu(&pt->list); goto out; } @@ -369,7 +350,7 @@ static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; * returns 0 on error and 1 on success. This is a generic routine to * all netdevices. */ -int netdev_boot_setup_add(char *name, struct ifmap *map) +static int netdev_boot_setup_add(char *name, struct ifmap *map) { struct netdev_boot_setup *s; int i; @@ -528,35 +509,6 @@ struct net_device *dev_get_by_name(const char *name) return dev; } -/* - Return value is changed to int to prevent illegal usage in future. - It is still legal to use to check for device existence. - - User should understand, that the result returned by this function - is meaningless, if it was not issued under rtnl semaphore. - */ - -/** - * dev_get - test if a device exists - * @name: name to test for - * - * Test if a name exists. Returns true if the name is found. In order - * to be sure the name is not allocated or removed during the test the - * caller must hold the rtnl semaphore. - * - * This function exists only for back compatibility with older - * drivers. - */ -int __dev_get(const char *name) -{ - struct net_device *dev; - - read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); - read_unlock(&dev_base_lock); - return dev != NULL; -} - /** * __dev_get_by_index - find a device by its ifindex * @ifindex: index of device @@ -631,26 +583,19 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) return dev; } -struct net_device *__dev_getfirstbyhwtype(unsigned short type) -{ - struct net_device *dev; - - for (dev = dev_base; dev; dev = dev->next) - if (dev->type == type) - break; - return dev; -} - -EXPORT_SYMBOL(__dev_getfirstbyhwtype); +EXPORT_SYMBOL(dev_getbyhwaddr); struct net_device *dev_getfirstbyhwtype(unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); - if (dev) - dev_hold(dev); + for (dev = dev_base; dev; dev = dev->next) { + if (dev->type == type) { + dev_hold(dev); + break; + } + } rtnl_unlock(); return dev; } @@ -673,32 +618,14 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_flags(if_flags, mask); - if (dev) - dev_hold(dev); - read_unlock(&dev_base_lock); - return dev; -} - -/** - * __dev_get_by_flags - find any device with given flags - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check - * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The caller must hold either - * the RTNL semaphore or @dev_base_lock. - */ - -struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask) -{ - struct net_device *dev; - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (((dev->flags ^ if_flags) & mask) == 0) - return dev; + if (((dev->flags ^ if_flags) & mask) == 0) { + dev_hold(dev); + break; + } } - return NULL; + read_unlock(&dev_base_lock); + return dev; } /** @@ -722,10 +649,12 @@ int dev_valid_name(const char *name) * @name: name format string * * Passed a format string - eg "lt%d" it will try and find a suitable - * id. Not efficient for many devices, not called a lot. The caller - * must hold the dev_base or rtnl lock while allocating the name and - * adding the device in order to avoid duplicates. Returns the number - * of the unit assigned or a negative errno code. + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. */ int dev_alloc_name(struct net_device *dev, const char *name) @@ -785,13 +714,15 @@ int dev_alloc_name(struct net_device *dev, const char *name) /** * dev_change_name - change name of a device * @dev: device - * @name: name (or format string) must be at least IFNAMSIZ + * @newname: name (or format string) must be at least IFNAMSIZ * * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ int dev_change_name(struct net_device *dev, char *newname) { + int err = 0; + ASSERT_RTNL(); if (dev->flags & IFF_UP) @@ -801,7 +732,7 @@ int dev_change_name(struct net_device *dev, char *newname) return -EINVAL; if (strchr(newname, '%')) { - int err = dev_alloc_name(dev, newname); + err = dev_alloc_name(dev, newname); if (err < 0) return err; strcpy(newname, dev->name); @@ -811,14 +742,29 @@ int dev_change_name(struct net_device *dev, char *newname) else strlcpy(dev->name, newname, IFNAMSIZ); - hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + err = class_device_rename(&dev->class_dev, dev->name); + if (!err) { + hlist_del(&dev->name_hlist); + hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + raw_notifier_call_chain(&netdev_chain, + NETDEV_CHANGENAME, dev); + } - class_device_rename(&dev->class_dev, dev->name); - notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); - return 0; + return err; } +/** + * netdev_features_change - device changes features + * @dev: device to cause notification + * + * Called to indicate a device has changed features. + */ +void netdev_features_change(struct net_device *dev) +{ + raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL(netdev_features_change); + /** * netdev_state_change - device changes state * @dev: device to cause notification @@ -830,7 +776,8 @@ int dev_change_name(struct net_device *dev, char *newname) void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + raw_notifier_call_chain(&netdev_chain, + NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } @@ -865,18 +812,6 @@ static int default_rebuild_header(struct sk_buff *skb) } -/* - * Some old buggy device drivers change get_stats after registering - * the device. Try and trap them here. - * This can be elimnated when all devices are known fixed. - */ -static inline int get_stats_changed(struct net_device *dev) -{ - int changed = dev->last_stats != dev->get_stats; - dev->last_stats = dev->get_stats; - return changed; -} - /** * dev_open - prepare an interface for use. * @dev: device to open @@ -900,14 +835,6 @@ int dev_open(struct net_device *dev) if (dev->flags & IFF_UP) return 0; - /* - * Check for broken device drivers. - */ - if (get_stats_changed(dev) && net_ratelimit()) { - printk(KERN_ERR "%s: driver changed get_stats after register\n", - dev->name); - } - /* * Is it even present? */ @@ -924,14 +851,6 @@ int dev_open(struct net_device *dev) clear_bit(__LINK_STATE_START, &dev->state); } - /* - * Check for more broken device drivers. - */ - if (get_stats_changed(dev) && net_ratelimit()) { - printk(KERN_ERR "%s: driver changed get_stats in open\n", - dev->name); - } - /* * If it went open OK then: */ @@ -955,44 +874,11 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ - notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); } return ret; } -#ifdef CONFIG_NET_FASTROUTE - -static void dev_do_clear_fastroute(struct net_device *dev) -{ - if (dev->accept_fastpath) { - int i; - - for (i = 0; i <= NETDEV_FASTROUTE_HMASK; i++) { - struct dst_entry *dst; - - write_lock_irq(&dev->fastpath_lock); - dst = dev->fastpath[i]; - dev->fastpath[i] = NULL; - write_unlock_irq(&dev->fastpath_lock); - - dst_release(dst); - } - } -} - -void dev_clear_fastroute(struct net_device *dev) -{ - if (dev) { - dev_do_clear_fastroute(dev); - } else { - read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) - dev_do_clear_fastroute(dev); - read_unlock(&dev_base_lock); - } -} -#endif - /** * dev_close - shutdown an interface. * @dev: device to shutdown @@ -1011,7 +897,7 @@ int dev_close(struct net_device *dev) * Tell people we are going down, so that they can * prepare to death, when device is still operating. */ - notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); dev_deactivate(dev); @@ -1026,8 +912,7 @@ int dev_close(struct net_device *dev) smp_mb__after_clear_bit(); /* Commit netif_running(). */ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { /* No hurry. */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); + msleep(1); } /* @@ -1045,14 +930,11 @@ int dev_close(struct net_device *dev) */ dev->flags &= ~IFF_UP; -#ifdef CONFIG_NET_FASTROUTE - dev_clear_fastroute(dev); -#endif /* * Tell people we are down */ - notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); return 0; } @@ -1083,7 +965,7 @@ int register_netdevice_notifier(struct notifier_block *nb) int err; rtnl_lock(); - err = notifier_chain_register(&netdev_chain, nb); + err = raw_notifier_chain_register(&netdev_chain, nb); if (!err) { for (dev = dev_base; dev; dev = dev->next) { nb->notifier_call(nb, NETDEV_REGISTER, dev); @@ -1108,7 +990,12 @@ int register_netdevice_notifier(struct notifier_block *nb) int unregister_netdevice_notifier(struct notifier_block *nb) { - return notifier_chain_unregister(&netdev_chain, nb); + int err; + + rtnl_lock(); + err = raw_notifier_chain_unregister(&netdev_chain, nb); + rtnl_unlock(); + return err; } /** @@ -1117,12 +1004,44 @@ int unregister_netdevice_notifier(struct notifier_block *nb) * @v: pointer passed unmodified to notifier function * * Call all network notifier blocks. Parameters and return value - * are as for notifier_call_chain(). + * are as for raw_notifier_call_chain(). */ int call_netdevice_notifiers(unsigned long val, void *v) { - return notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, v); +} + +/* When > 0 there are consumers of rx skb time stamps */ +static atomic_t netstamp_needed = ATOMIC_INIT(0); + +void net_enable_timestamp(void) +{ + atomic_inc(&netstamp_needed); +} + +void net_disable_timestamp(void) +{ + atomic_dec(&netstamp_needed); +} + +void __net_timestamp(struct sk_buff *skb) +{ + struct timeval tv; + + do_gettimeofday(&tv); + skb_set_timestamp(skb, &tv); +} +EXPORT_SYMBOL(__net_timestamp); + +static inline void net_timestamp(struct sk_buff *skb) +{ + if (atomic_read(&netstamp_needed)) + __net_timestamp(skb); + else { + skb->tstamp.off_sec = 0; + skb->tstamp.off_usec = 0; + } } /* @@ -1130,10 +1049,11 @@ int call_netdevice_notifiers(unsigned long val, void *v) * taps currently in use. */ -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - net_timestamp(&skb->stamp); + + net_timestamp(skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1164,36 +1084,161 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); } -/* Calculate csum in the case, when packet is misrouted. - * If it failed by some reason, ignore and send skb with wrong - * checksum. + +void __netif_schedule(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { + unsigned long flags; + struct softnet_data *sd; + + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + dev->next_sched = sd->output_queue; + sd->output_queue = dev; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__netif_schedule); + +void __netif_rx_schedule(struct net_device *dev) +{ + unsigned long flags; + + local_irq_save(flags); + dev_hold(dev); + list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); + if (dev->quota < 0) + dev->quota += dev->weight; + else + dev->quota = dev->weight; + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__netif_rx_schedule); + +void dev_kfree_skb_any(struct sk_buff *skb) +{ + if (in_irq() || irqs_disabled()) + dev_kfree_skb_irq(skb); + else + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(dev_kfree_skb_any); + + +/* Hot-plugging. */ +void netif_device_detach(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_stop_queue(dev); + } +} +EXPORT_SYMBOL(netif_device_detach); + +void netif_device_attach(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_wake_queue(dev); + __netdev_watchdog_up(dev); + } +} +EXPORT_SYMBOL(netif_device_attach); + + +/* + * Invalidate hardware checksum when packet is to be mangled, and + * complete checksum manually on outgoing path. */ -struct sk_buff *skb_checksum_help(struct sk_buff *skb) +int skb_checksum_help(struct sk_buff *skb, int inward) { unsigned int csum; - int offset = skb->h.raw - skb->data; + int ret = 0, offset = skb->h.raw - skb->data; + + if (inward) { + skb->ip_summed = CHECKSUM_NONE; + goto out; + } + + if (skb_cloned(skb)) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (ret) + goto out; + } - if (offset > (int)skb->len) - BUG(); + BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); offset = skb->tail - skb->h.raw; - if (offset <= 0) - BUG(); - if (skb->csum + 2 > offset) - BUG(); + BUG_ON(offset <= 0); + BUG_ON(skb->csum + 2 > offset); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; - return skb; +out: + return ret; } +/** + * skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; + + BUG_ON(skb_shinfo(skb)->frag_list); + BUG_ON(skb->ip_summed != CHECKSUM_HW); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + segs = ptype->gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb->mac.raw); + + return segs; +} + +EXPORT_SYMBOL(skb_gso_segment); + +/* Take action when hardware reception checksum errors are detected. */ +#ifdef CONFIG_BUG +void netdev_rx_csum_fault(struct net_device *dev) +{ + if (net_ratelimit()) { + printk(KERN_ERR "%s: hw csum failure.\n", + dev ? dev->name : ""); + dump_stack(); + } +} +EXPORT_SYMBOL(netdev_rx_csum_fault); +#endif + #ifdef CONFIG_HIGHMEM /* Actually, we should eliminate this check as soon as we know, that: * 1. IOMMU is present and allows to map all the memory. @@ -1208,7 +1253,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) + if (PageHighMem(skb_shinfo(skb)->frags[i].page)) return 1; return 0; @@ -1217,65 +1262,147 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -extern void skb_release_data(struct sk_buff *); +struct dev_gso_cb { + void (*destructor)(struct sk_buff *skb); +}; -/* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, int gfp_mask) +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) + +static void dev_gso_skb_destructor(struct sk_buff *skb) { - unsigned int size; - u8 *data; - long offset; - struct skb_shared_info *ninfo; - int headerlen = skb->data - skb->head; - int expand = (skb->tail + skb->data_len) - skb->end; + struct dev_gso_cb *cb; - if (skb_shared(skb)) - BUG(); + do { + struct sk_buff *nskb = skb->next; - if (expand <= 0) - expand = 0; + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); - size = skb->end - skb->head + expand; - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - return -ENOMEM; + cb = DEV_GSO_CB(skb); + if (cb->destructor) + cb->destructor(skb); +} - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) - BUG(); +/** + * dev_gso_segment - Perform emulated hardware segmentation on skb. + * @skb: buffer to segment + * + * This function segments the given skb and stores the list of segments + * in skb->next. + */ +static int dev_gso_segment(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct sk_buff *segs; + int features = dev->features & ~(illegal_highdma(dev, skb) ? + NETIF_F_SG : 0); - /* Set up shinfo */ - ninfo = (struct skb_shared_info*)(data + size); - atomic_set(&ninfo->dataref, 1); - ninfo->tso_size = skb_shinfo(skb)->tso_size; - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; + segs = skb_gso_segment(skb, features); - /* Offset between the two in bytes */ - offset = data - skb->head; + /* Verifying header integrity only. */ + if (!segs) + return 0; - /* Free old data. */ - skb_release_data(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); - skb->head = data; - skb->end = data + size; + skb->next = segs; + DEV_GSO_CB(skb)->destructor = skb->destructor; + skb->destructor = dev_gso_skb_destructor; - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; + return 0; +} + +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + if (likely(!skb->next)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } + + return dev->hard_start_xmit(skb, dev); + } + +gso: + do { + struct sk_buff *nskb = skb->next; + int rc; + + skb->next = nskb->next; + nskb->next = NULL; + rc = dev->hard_start_xmit(nskb, dev); + if (unlikely(rc)) { + nskb->next = skb->next; + skb->next = nskb; + return rc; + } + if (unlikely(netif_queue_stopped(dev) && skb->next)) + return NETDEV_TX_BUSY; + } while (skb->next); + + skb->destructor = DEV_GSO_CB(skb)->destructor; - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; +out_kfree_skb: + kfree_skb(skb); + return 0; +} + +#define HARD_TX_LOCK(dev, cpu) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ + netif_tx_lock(dev); \ + } \ +} + +#define HARD_TX_UNLOCK(dev) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ + netif_tx_unlock(dev); \ + } \ +} - skb->tail += skb->data_len; - skb->data_len = 0; +#ifdef CONFIG_XEN +inline int skb_checksum_setup(struct sk_buff *skb) +{ + if (skb->proto_csum_blank) { + if (skb->protocol != htons(ETH_P_IP)) + goto out; + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; + if (skb->h.raw >= skb->tail) + goto out; + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + skb->csum = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + skb->csum = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", skb->nh.iph->protocol); + goto out; + } + if ((skb->h.raw + skb->csum + 2) > skb->tail) + goto out; + skb->ip_summed = CHECKSUM_HW; + skb->proto_csum_blank = 0; + } return 0; +out: + return -EPROTO; } +#else +inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } +#endif + /** * dev_queue_xmit - transmit a buffer @@ -1288,6 +1415,19 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) * A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. + * + * ----------------------------------------------------------------------------------- + * I notice this method can also return errors from the queue disciplines, + * including NET_XMIT_DROP, which is a positive value. So, errors can also + * be positive. + * + * Regardless of the return value, the skb is consumed, so it is currently + * difficult to retry a send to this method. (You can bump the ref count + * before sending to hold a reference for retry if you are careful.) + * + * When calling this method, interrupts MUST be enabled. This is because + * the BH enable code must have IRQs enabled so that it will not deadlock. + * --BLG */ int dev_queue_xmit(struct sk_buff *skb) @@ -1296,9 +1436,19 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + /* If a checksum-deferred packet is forwarded to a device that needs a + * checksum, correct the pointers and force checksumming. + */ + if (skb_checksum_setup(skb)) + goto out_kfree_skb; + + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; + if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, @@ -1307,29 +1457,52 @@ int dev_queue_xmit(struct sk_buff *skb) */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP)))) { - if ((skb = skb_checksum_help(skb)) == NULL) - goto out; - } + skb->protocol != htons(ETH_P_IP)))) + if (skb_checksum_help(skb, 0)) + goto out_kfree_skb; + +gso: + spin_lock_prefetch(&dev->queue_lock); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ + rcu_read_lock_bh(); + + /* Updates of qdisc are serialized by queue_lock. + * The struct Qdisc which is pointed to by qdisc is now a + * rcu structure - it may be accessed without acquiring + * a lock (but the structure may be stale.) The freeing of the + * qdisc will be deferred until it's known that there are no + * more references to it. + * + * If the qdisc has an enqueue function, we still need to + * hold the queue_lock before calling it, since queue_lock + * also serializes access to the device queue. + */ - /* Grab device queue */ - spin_lock_bh(&dev->queue_lock); - q = dev->qdisc; + q = rcu_dereference(dev->qdisc); +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); +#endif if (q->enqueue) { + /* Grab device queue */ + spin_lock(&dev->queue_lock); + rc = q->enqueue(skb, q); qdisc_run(dev); - spin_unlock_bh(&dev->queue_lock); + spin_unlock(&dev->queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } @@ -1337,8 +1510,8 @@ int dev_queue_xmit(struct sk_buff *skb) /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics + Really, it is unlikely that netif_tx_lock protection is necessary + here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. @@ -1347,181 +1520,53 @@ int dev_queue_xmit(struct sk_buff *skb) Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); + int cpu = smp_processor_id(); /* ok because BHs are off */ if (dev->xmit_lock_owner != cpu) { - /* - * The spin_lock effectivly does a preempt lock, but - * we are about to drop that... - */ - preempt_disable(); - spin_unlock(&dev->queue_lock); - spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = cpu; - preempt_enable(); - if (!netif_queue_stopped(dev)) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); + HARD_TX_LOCK(dev, cpu); + if (!netif_queue_stopped(dev)) { rc = 0; - if (!dev->hard_start_xmit(skb, dev)) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); + if (!dev_hard_start_xmit(skb, dev)) { + HARD_TX_UNLOCK(dev); goto out; - } - } - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - if (net_ratelimit()) - printk(KERN_CRIT "Virtual device %s asks to " - "queue packet!\n", dev->name); - goto out_enetdown; - } else { - /* Recursion is detected! It is possible, - * unfortunately */ - if (net_ratelimit()) - printk(KERN_CRIT "Dead loop on virtual device " - "%s, fix it urgently!\n", dev->name); - } - } - spin_unlock_bh(&dev->queue_lock); -out_enetdown: - rc = -ENETDOWN; -out_kfree_skb: - kfree_skb(skb); -out: - return rc; -} - - -/*======================================================================= - Receiver routines - =======================================================================*/ - -int netdev_max_backlog = 300; -int weight_p = 64; /* old backlog weight */ -/* These numbers are selected based on intuition and some - * experimentatiom, if you have more scientific way of doing this - * please go ahead and fix things. - */ -int no_cong_thresh = 10; -int no_cong = 20; -int lo_cong = 100; -int mod_cong = 290; - -DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; - - -#ifdef CONFIG_NET_HW_FLOWCONTROL -atomic_t netdev_dropping = ATOMIC_INIT(0); -static unsigned long netdev_fc_mask = 1; -unsigned long netdev_fc_xoff; -spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; - -static struct -{ - void (*stimul)(struct net_device *); - struct net_device *dev; -} netdev_fc_slots[BITS_PER_LONG]; - -int netdev_register_fc(struct net_device *dev, - void (*stimul)(struct net_device *dev)) -{ - int bit = 0; - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (netdev_fc_mask != ~0UL) { - bit = ffz(netdev_fc_mask); - netdev_fc_slots[bit].stimul = stimul; - netdev_fc_slots[bit].dev = dev; - set_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); - } - spin_unlock_irqrestore(&netdev_fc_lock, flags); - return bit; -} - -void netdev_unregister_fc(int bit) -{ - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (bit > 0) { - netdev_fc_slots[bit].stimul = NULL; - netdev_fc_slots[bit].dev = NULL; - clear_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); + } + } + HARD_TX_UNLOCK(dev); + if (net_ratelimit()) + printk(KERN_CRIT "Virtual device %s asks to " + "queue packet!\n", dev->name); + } else { + /* Recursion is detected! It is possible, + * unfortunately */ + if (net_ratelimit()) + printk(KERN_CRIT "Dead loop on virtual device " + "%s, fix it urgently!\n", dev->name); + } } - spin_unlock_irqrestore(&netdev_fc_lock, flags); -} -static void netdev_wakeup(void) -{ - unsigned long xoff; + rc = -ENETDOWN; + rcu_read_unlock_bh(); - spin_lock(&netdev_fc_lock); - xoff = netdev_fc_xoff; - netdev_fc_xoff = 0; - while (xoff) { - int i = ffz(~xoff); - xoff &= ~(1 << i); - netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); - } - spin_unlock(&netdev_fc_lock); +out_kfree_skb: + kfree_skb(skb); + return rc; +out: + rcu_read_unlock_bh(); + return rc; } -#endif -static void get_sample_stats(int cpu) -{ -#ifdef RAND_LIE - unsigned long rd; - int rq; -#endif - struct softnet_data *sd = &per_cpu(softnet_data, cpu); - int blog = sd->input_pkt_queue.qlen; - int avg_blog = sd->avg_blog; - - avg_blog = (avg_blog >> 1) + (blog >> 1); - - if (avg_blog > mod_cong) { - /* Above moderate congestion levels. */ - sd->cng_level = NET_RX_CN_HIGH; -#ifdef RAND_LIE - rd = net_random(); - rq = rd % netdev_max_backlog; - if (rq < avg_blog) /* unlucky bastard */ - sd->cng_level = NET_RX_DROP; -#endif - } else if (avg_blog > lo_cong) { - sd->cng_level = NET_RX_CN_MOD; -#ifdef RAND_LIE - rd = net_random(); - rq = rd % netdev_max_backlog; - if (rq < avg_blog) /* unlucky bastard */ - sd->cng_level = NET_RX_CN_HIGH; -#endif - } else if (avg_blog > no_cong) - sd->cng_level = NET_RX_CN_LOW; - else /* no congestion */ - sd->cng_level = NET_RX_SUCCESS; - sd->avg_blog = avg_blog; -} +/*======================================================================= + Receiver routines + =======================================================================*/ -#ifdef OFFLINE_SAMPLE -static void sample_queue(unsigned long dummy) -{ -/* 10 ms 0r 1ms -- i don't care -- JHS */ - int next_tick = 1; - int cpu = smp_processor_id(); +int netdev_max_backlog = 1000; +int netdev_budget = 300; +int weight_p = 64; /* old backlog weight */ - get_sample_stats(cpu); - next_tick += jiffies; - mod_timer(&samp_timer, next_tick); -} -#endif +DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; /** @@ -1544,65 +1589,37 @@ static void sample_queue(unsigned long dummy) int netif_rx(struct sk_buff *skb) { - int this_cpu; struct softnet_data *queue; unsigned long flags; -#ifdef CONFIG_NETPOLL_RX - if (skb->dev->netpoll_rx && netpoll_rx(skb)) { - kfree_skb(skb); + /* if netpoll wants it, pretend we never saw it */ + if (netpoll_rx(skb)) return NET_RX_DROP; - } -#endif - - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + + if (!skb->tstamp.off_sec) + net_timestamp(skb); /* * The code is rearranged so that the path is the most * short when CPU is congested, but is still operating. */ local_irq_save(flags); - this_cpu = smp_processor_id(); queue = &__get_cpu_var(softnet_data); __get_cpu_var(netdev_rx_stat).total++; if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { if (queue->input_pkt_queue.qlen) { - if (queue->throttle) - goto drop; - enqueue: dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue, skb); -#ifndef OFFLINE_SAMPLE - get_sample_stats(this_cpu); -#endif local_irq_restore(flags); - return queue->cng_level; - } - - if (queue->throttle) { - queue->throttle = 0; -#ifdef CONFIG_NET_HW_FLOWCONTROL - if (atomic_dec_and_test(&netdev_dropping)) - netdev_wakeup(); -#endif + return NET_RX_SUCCESS; } netif_rx_schedule(&queue->backlog_dev); goto enqueue; } - if (!queue->throttle) { - queue->throttle = 1; - __get_cpu_var(netdev_rx_stat).throttled++; -#ifdef CONFIG_NET_HW_FLOWCONTROL - atomic_inc(&netdev_dropping); -#endif - } - -drop: __get_cpu_var(netdev_rx_stat).dropped++; local_irq_restore(flags); @@ -1610,14 +1627,50 @@ drop: return NET_RX_DROP; } -static __inline__ void skb_bond(struct sk_buff *skb) +int netif_rx_ni(struct sk_buff *skb) +{ + int err; + + preempt_disable(); + err = netif_rx(skb); + if (local_softirq_pending()) + do_softirq(); + preempt_enable(); + + return err; +} + +EXPORT_SYMBOL(netif_rx_ni); + +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; if (dev->master) { - skb->real_dev = skb->dev; + /* + * On bonding slaves other than the currently active + * slave, suppress duplicates except for 802.3ad + * ETH_P_SLOW and alb non-mcast/bcast. + */ + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { + if (dev->master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + goto keep; + } + + if (dev->master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + goto keep; + + kfree_skb(skb); + return NULL; + } +keep: skb->dev = dev->master; } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1667,86 +1720,160 @@ static void net_tx_action(struct softirq_action *h) } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, int last) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } - #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -int (*br_handle_frame_hook)(struct sk_buff *skb); +int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); +struct net_bridge; +struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, + unsigned char *addr); +void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); -static __inline__ int handle_bridge(struct sk_buff *skb, - struct packet_type *pt_prev) +static __inline__ int handle_bridge(struct sk_buff **pskb, + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { - int ret = NET_RX_DROP; - if (pt_prev) - ret = deliver_skb(skb, pt_prev, 0); + struct net_bridge_port *port; - return ret; -} + if ((*pskb)->pkt_type == PACKET_LOOPBACK || + (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) + return 0; + if (*pt_prev) { + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + + return br_handle_frame_hook(port, pskb); +} +#else +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif -static inline int __handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret) +#ifdef CONFIG_NET_CLS_ACT +/* TODO: Maybe we should just force sch_ingress to be compiled in + * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions + * a compare and 2 stores extra right now if we dont have it on + * but have CONFIG_NET_CLS_ACT + * NOTE: This doesnt stop any functionality; if you dont have + * the ingress scheduler, you just cant add policies on ingress. + * + */ +static int ing_filter(struct sk_buff *skb) { -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - if (skb->dev->br_port && skb->pkt_type != PACKET_LOOPBACK) { - *ret = handle_bridge(skb, *pt_prev); - if (br_handle_frame_hook(skb) == 0) - return 1; + struct Qdisc *q; + struct net_device *dev = skb->dev; + int result = TC_ACT_OK; + + if (dev->qdisc_ingress) { + __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); + if (MAX_RED_LOOP < ttl++) { + printk("Redir loop detected Dropping packet (%s->%s)\n", + skb->input_dev->name, skb->dev->name); + return TC_ACT_SHOT; + } + + skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); + + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); + + spin_lock(&dev->ingress_lock); + if ((q = dev->qdisc_ingress) != NULL) + result = q->enqueue(skb, q); + spin_unlock(&dev->ingress_lock); - *pt_prev = NULL; } -#endif - return 0; + + return result; } +#endif int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; -#ifdef CONFIG_NETPOLL_RX - if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) { - kfree_skb(skb); + /* if we've gotten here through NAPI, check netpoll */ + if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - } -#endif - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); - skb_bond(skb); + if (!skb->input_dev) + skb->input_dev = skb->dev; - __get_cpu_var(netdev_rx_stat).total++; + orig_dev = skb_bond(skb); -#ifdef CONFIG_NET_FASTROUTE - if (skb->pkt_type == PACKET_FASTROUTE) { - __get_cpu_var(netdev_rx_stat).fastroute_deferred_out++; - return dev_queue_xmit(skb); - } -#endif + if (!orig_dev) + return NET_RX_DROP; + + __get_cpu_var(netdev_rx_stat).total++; skb->h.raw = skb->nh.raw = skb->data; skb->mac_len = skb->nh.raw - skb->mac.raw; pt_prev = NULL; + rcu_read_lock(); + +#ifdef CONFIG_NET_CLS_ACT + if (skb->tc_verd & TC_NCLS) { + skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); + goto ncls; + } +#endif + +#ifdef CONFIG_XEN + switch (skb->ip_summed) { + case CHECKSUM_UNNECESSARY: + skb->proto_data_valid = 1; + break; + case CHECKSUM_HW: + /* XXX Implement me. */ + default: + skb->proto_data_valid = 0; + break; + } +#endif + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev, 0); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } +#ifdef CONFIG_NET_CLS_ACT + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; /* noone else should process this after*/ + } else { + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + } + + ret = ing_filter(skb); + + if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { + kfree_skb(skb); + goto out; + } + + skb->tc_verd = 0; +ncls: +#endif + handle_diverter(skb); - if (__handle_bridge(skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1754,13 +1881,13 @@ int netif_receive_skb(struct sk_buff *skb) if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev, 0); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -1781,6 +1908,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; + backlog_dev->weight = weight_p; for (;;) { struct sk_buff *skb; struct net_device *dev; @@ -1802,16 +1930,6 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) if (work >= quota || jiffies - start_time > 1) break; -#ifdef CONFIG_NET_HW_FLOWCONTROL - if (queue->throttle && - queue->input_pkt_queue.qlen < no_cong_thresh ) { - queue->throttle = 0; - if (atomic_dec_and_test(&netdev_dropping)) { - netdev_wakeup(); - break; - } - } -#endif } backlog_dev->quota -= work; @@ -1826,13 +1944,6 @@ job_done: smp_mb__before_clear_bit(); netif_poll_enable(backlog_dev); - if (queue->throttle) { - queue->throttle = 0; -#ifdef CONFIG_NET_HW_FLOWCONTROL - if (atomic_dec_and_test(&netdev_dropping)) - netdev_wakeup(); -#endif - } local_irq_enable(); return 0; } @@ -1841,9 +1952,9 @@ static void net_rx_action(struct softirq_action *h) { struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - int budget = netdev_max_backlog; + int budget = netdev_budget; + void *have; - local_irq_disable(); while (!list_empty(&queue->poll_list)) { @@ -1856,16 +1967,18 @@ static void net_rx_action(struct softirq_action *h) dev = list_entry(queue->poll_list.next, struct net_device, poll_list); + have = netpoll_poll_lock(dev); if (dev->quota <= 0 || dev->poll(dev, &budget)) { + netpoll_poll_unlock(have); local_irq_disable(); - list_del(&dev->poll_list); - list_add_tail(&dev->poll_list, &queue->poll_list); + list_move_tail(&dev->poll_list, &queue->poll_list); if (dev->quota < 0) dev->quota += dev->weight; else dev->quota = dev->weight; } else { + netpoll_poll_unlock(have); dev_put(dev); local_irq_disable(); } @@ -1948,7 +2061,7 @@ static int dev_ifconf(char __user *arg) { struct ifconf ifc; struct net_device *dev; - char *pos; + char __user *pos; int len; int total; int i; @@ -1969,6 +2082,9 @@ static int dev_ifconf(char __user *arg) total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (vx_flags(VXF_HIDE_NETIF, 0) && + !dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2029,6 +2145,10 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); @@ -2103,15 +2223,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v) struct netif_rx_stats *s = v; seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, s->throttled, - s->fastroute_hit, s->fastroute_success, s->fastroute_defer, - s->fastroute_deferred_out, -#if 0 - s->fastroute_latency_reduction -#else - s->cpu_collision -#endif - ); + s->total, s->dropped, s->time_squeeze, 0, + 0, 0, 0, 0, /* was fastroute */ + s->cpu_collision ); return 0; } @@ -2155,7 +2269,7 @@ static struct file_operations softnet_seq_fops = { .release = seq_release, }; -#ifdef WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT extern int wireless_proc_init(void); #else #define wireless_proc_init() 0 @@ -2229,7 +2343,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) * @dev: device * @inc: modifier * - * Add or remove promsicuity from a device. While the count in the device + * Add or remove promiscuity from a device. While the count in the device * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. @@ -2238,21 +2352,21 @@ void dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; - dev->flags |= IFF_PROMISC; if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; - if (dev->flags ^ old_flags) { -#ifdef CONFIG_NET_FASTROUTE - if (dev->flags & IFF_PROMISC) { - netdev_fastroute_obstacles++; - dev_clear_fastroute(dev); - } else - netdev_fastroute_obstacles--; -#endif + else + dev->flags |= IFF_PROMISC; + if (dev->flags != old_flags) { dev_mc_upload(dev); printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); + audit_log(current->audit_context, GFP_ATOMIC, + AUDIT_ANOM_PROMISCUOUS, + "dev=%s prom=%d old_prom=%d auid=%u", + dev->name, (dev->flags & IFF_PROMISC), + (old_flags & IFF_PROMISC), + audit_get_loginuid(current->audit_context)); } } @@ -2285,12 +2399,20 @@ unsigned dev_get_flags(const struct net_device *dev) flags = (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI | - IFF_RUNNING)) | + IFF_RUNNING | + IFF_LOWER_UP | + IFF_DORMANT)) | (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); - if (netif_running(dev) && netif_carrier_ok(dev)) - flags |= IFF_RUNNING; + if (netif_running(dev)) { + if (netif_oper_up(dev)) + flags |= IFF_RUNNING; + if (netif_carrier_ok(dev)) + flags |= IFF_LOWER_UP; + if (netif_dormant(dev)) + flags |= IFF_DORMANT; + } return flags; } @@ -2333,7 +2455,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) if (dev->flags & IFF_UP && ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + raw_notifier_call_chain(&netdev_chain, + NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? +1 : -1; @@ -2377,11 +2500,27 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; if (!err && dev->flags & IFF_UP) - notifier_call_chain(&netdev_chain, - NETDEV_CHANGEMTU, dev); + raw_notifier_call_chain(&netdev_chain, + NETDEV_CHANGEMTU, dev); return err; } +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) +{ + int err; + + if (!dev->set_mac_address) + return -EOPNOTSUPP; + if (sa->sa_family != dev->type) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + err = dev->set_mac_address(dev, sa); + if (!err) + raw_notifier_call_chain(&netdev_chain, + NETDEV_CHANGEADDR, dev); + return err; +} /* * Perform the SIOCxIFxxx calls. @@ -2419,30 +2558,23 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return dev_set_mtu(dev, ifr->ifr_mtu); case SIOCGIFHWADDR: - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); + if (!dev->addr_len) + memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); + else + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); ifr->ifr_hwaddr.sa_family = dev->type; return 0; case SIOCSIFHWADDR: - if (!dev->set_mac_address) - return -EOPNOTSUPP; - if (ifr->ifr_hwaddr.sa_family != dev->type) - return -EINVAL; - if (!netif_device_present(dev)) - return -ENODEV; - err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); - if (!err) - notifier_call_chain(&netdev_chain, - NETDEV_CHANGEADDR, dev); - return err; + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); return 0; @@ -2515,6 +2647,8 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) cmd == SIOCGMIIPHY || cmd == SIOCGMIIREG || cmd == SIOCSMIIREG || + cmd == SIOCBRADDIF || + cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (dev->do_ioctl) { @@ -2559,9 +2693,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) */ if (cmd == SIOCGIFCONF) { - rtnl_shlock(); + rtnl_lock(); ret = dev_ifconf((char __user *) arg); - rtnl_shunlock(); + rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) @@ -2666,11 +2800,14 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: + case SIOCBRADDIF: + case SIOCBRDELIF: if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* fall through */ + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: dev_load(ifr.ifr_name); rtnl_lock(); ret = dev_ifsioc(&ifr, cmd); @@ -2702,13 +2839,14 @@ int dev_ioctl(unsigned int cmd, void __user *arg) ret = -EFAULT; return ret; } -#ifdef WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { /* If command is `set a parameter', or * `get the encoding parameters', check if * the user has the right to do it */ - if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) { + if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE + || cmd == SIOCGIWENCODEEXT) { if (!capable(CAP_NET_ADMIN)) return -EPERM; } @@ -2717,13 +2855,13 @@ int dev_ioctl(unsigned int cmd, void __user *arg) /* Follow me in net/core/wireless.c */ ret = wireless_process_ioctl(&ifr, cmd); rtnl_unlock(); - if (!ret && IW_IS_GET(cmd) && + if (IW_IS_GET(cmd) && copy_to_user(arg, &ifr, sizeof(struct ifreq))) ret = -EFAULT; return ret; } -#endif /* WIRELESS_EXT */ +#endif /* CONFIG_WIRELESS_EXT */ return -EINVAL; } } @@ -2736,7 +2874,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -int dev_new_index(void) +static int dev_new_index(void) { static int ifindex; for (;;) { @@ -2750,7 +2888,7 @@ int dev_new_index(void) static int dev_boot_phase = 1; /* Delayed registration/unregisteration */ -static spinlock_t net_todo_list_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); static inline void net_set_todo(struct net_device *dev) @@ -2769,8 +2907,7 @@ static inline void net_set_todo(struct net_device *dev) * chain. 0 is returned on success. A negative errno code is returned * on a failure to set up the device, or if the name is a duplicate. * - * Callers must hold the rtnl semaphore. See the comment at the - * end of Space.c for details about the locking. You may want + * Callers must hold the rtnl semaphore. You may want * register_netdev() instead of this. * * BUGS: @@ -2787,14 +2924,16 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev_boot_phase); ASSERT_RTNL(); + might_sleep(); + /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); + spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; -#ifdef CONFIG_NET_FASTROUTE - dev->fastpath_lock = RW_LOCK_UNLOCKED; +#ifdef CONFIG_NET_CLS_ACT + spin_lock_init(&dev->ingress_lock); #endif ret = alloc_divert_blk(dev); @@ -2835,14 +2974,34 @@ int register_netdevice(struct net_device *dev) /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) { + !(dev->features & NETIF_F_ALL_CSUM)) { printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; } + /* TSO requires that SG is present as well. */ + if ((dev->features & NETIF_F_TSO) && + !(dev->features & NETIF_F_SG)) { + printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", + dev->name); + dev->features &= ~NETIF_F_TSO; + } + if (dev->features & NETIF_F_UFO) { + if (!(dev->features & NETIF_F_HW_CSUM)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_HW_CSUM feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + if (!(dev->features & NETIF_F_SG)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_SG feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + } + /* * nil rebuild_header routine, * that should be never called and used as just bug trap. @@ -2851,6 +3010,11 @@ int register_netdevice(struct net_device *dev) if (!dev->rebuild_header) dev->rebuild_header = default_rebuild_header; + ret = netdev_register_sysfs(dev); + if (ret) + goto out_err; + dev->reg_state = NETREG_REGISTERED; + /* * Default initial state at registry is that the * device is present. @@ -2866,14 +3030,11 @@ int register_netdevice(struct net_device *dev) hlist_add_head(&dev->name_hlist, head); hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - dev->reg_state = NETREG_REGISTERING; write_unlock_bh(&dev_base_lock); /* Notify protocols, that a new device appeared. */ - notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - /* Finish registration after unlock */ - net_set_todo(dev); ret = 0; out: @@ -2883,6 +3044,51 @@ out_err: goto out; } +/** + * register_netdev - register a network device + * @dev: device to register + * + * Take a completed network device structure and add it to the kernel + * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier + * chain. 0 is returned on success. A negative errno code is returned + * on a failure to set up the device, or if the name is a duplicate. + * + * This is a wrapper around register_netdev that takes the rtnl semaphore + * and expands the device name if you passed a format string to + * alloc_netdev. + */ +int register_netdev(struct net_device *dev) +{ + int err; + + rtnl_lock(); + + /* + * If the name is a format string the caller wants us to do a + * name allocation. + */ + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto out; + } + + /* + * Back compatibility hook. Kill this one in 2.5 + */ + if (dev->name[0] == 0 || dev->name[0] == ' ') { + err = dev_alloc_name(dev, "eth%d"); + if (err < 0) + goto out; + } + + err = register_netdevice(dev); +out: + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL(register_netdev); + /* * netdev_wait_allrefs - wait until all references are gone. * @@ -2901,11 +3107,10 @@ static void netdev_wait_allrefs(struct net_device *dev) rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { - rtnl_shlock(); - rtnl_exlock(); + rtnl_lock(); /* Rebroadcast unregister notification */ - notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, @@ -2919,14 +3124,12 @@ static void netdev_wait_allrefs(struct net_device *dev) linkwatch_run_queue(); } - rtnl_exunlock(); - rtnl_shunlock(); + __rtnl_unlock(); rebroadcast_time = jiffies; } - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ / 4); + msleep(250); if (time_after(jiffies, warning_time + 10 * HZ)) { printk(KERN_EMERG "unregister_netdevice: " @@ -2954,24 +3157,26 @@ static void netdev_wait_allrefs(struct net_device *dev) * * We are invoked by rtnl_unlock() after it drops the semaphore. * This allows us to deal with problems: - * 1) We can create/delete sysfs objects which invoke hotplug + * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. */ -static DECLARE_MUTEX(net_todo_run_mutex); +static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list = LIST_HEAD_INIT(list); - /* Safe outside mutex since we only care about entries that - * this cpu put into queue while under RTNL. + /* Need to guard against multiple cpu's getting out of order. */ + mutex_lock(&net_todo_run_mutex); + + /* Not safe to do outside the semaphore. We must not return + * until all unregister events invoked by the local processor + * have been completed (either by this todo run, or one on + * another cpu). */ if (list_empty(&net_todo_list)) - return; - - /* Need to guard against multiple cpu's getting out of order. */ - down(&net_todo_run_mutex); + goto out; /* Snapshot list, allow later requests */ spin_lock(&net_todo_list_lock); @@ -2983,41 +3188,73 @@ void netdev_run_todo(void) = list_entry(list.next, struct net_device, todo_list); list_del(&dev->todo_list); - switch(dev->reg_state) { - case NETREG_REGISTERING: - netdev_register_sysfs(dev); - dev->reg_state = NETREG_REGISTERED; - break; + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { + printk(KERN_ERR "network todo '%s' but state %d\n", + dev->name, dev->reg_state); + dump_stack(); + continue; + } - case NETREG_UNREGISTERING: - netdev_unregister_sysfs(dev); - dev->reg_state = NETREG_UNREGISTERED; + netdev_unregister_sysfs(dev); + dev->reg_state = NETREG_UNREGISTERED; - netdev_wait_allrefs(dev); + netdev_wait_allrefs(dev); - /* paranoia */ - BUG_ON(atomic_read(&dev->refcnt)); - BUG_TRAP(!dev->ip_ptr); - BUG_TRAP(!dev->ip6_ptr); - BUG_TRAP(!dev->dn_ptr); + /* paranoia */ + BUG_ON(atomic_read(&dev->refcnt)); + BUG_TRAP(!dev->ip_ptr); + BUG_TRAP(!dev->ip6_ptr); + BUG_TRAP(!dev->dn_ptr); + /* It must be the very last action, + * after this 'dev' may point to freed up memory. + */ + if (dev->destructor) + dev->destructor(dev); + } - /* It must be the very last action, - * after this 'dev' may point to freed up memory. - */ - if (dev->destructor) - dev->destructor(dev); - break; +out: + mutex_unlock(&net_todo_run_mutex); +} - default: - printk(KERN_ERR "network todo '%s' but state %d\n", - dev->name, dev->reg_state); - break; - } +/** + * alloc_netdev - allocate network device + * @sizeof_priv: size of private data to allocate space for + * @name: device name format string + * @setup: callback to initialize device + * + * Allocates a struct net_device with private data area for driver use + * and performs basic initialization. + */ +struct net_device *alloc_netdev(int sizeof_priv, const char *name, + void (*setup)(struct net_device *)) +{ + void *p; + struct net_device *dev; + int alloc_size; + + /* ensure 32-byte alignment of both the device and private area */ + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; + alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; + + p = kzalloc(alloc_size, GFP_KERNEL); + if (!p) { + printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); + return NULL; } - up(&net_todo_run_mutex); + dev = (struct net_device *) + (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + dev->padded = (char *)dev - (char *)p; + + if (sizeof_priv) + dev->priv = netdev_priv(dev); + + setup(dev); + strcpy(dev->name, name); + return dev; } +EXPORT_SYMBOL(alloc_netdev); /** * free_netdev - free network device @@ -3029,7 +3266,8 @@ void netdev_run_todo(void) */ void free_netdev(struct net_device *dev) { - /* Compatiablity with error handling in drivers */ +#ifdef CONFIG_SYSFS + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); return; @@ -3040,13 +3278,16 @@ void free_netdev(struct net_device *dev) /* will free via class release */ class_device_put(&dev->class_dev); +#else + kfree((char *)dev - dev->padded); +#endif } /* Synchronize with packet receive processing. */ void synchronize_net(void) { might_sleep(); - synchronize_kernel(); + synchronize_rcu(); } /** @@ -3057,8 +3298,7 @@ void synchronize_net(void) * from the kernel tables. On success 0 is returned, on a failure * a negative errno code is returned. * - * Callers must hold the rtnl semaphore. See the comment at the - * end of Space.c for details about the locking. You may want + * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ @@ -3105,10 +3345,6 @@ int unregister_netdevice(struct net_device *dev) synchronize_net(); -#ifdef CONFIG_NET_FASTROUTE - dev_clear_fastroute(dev); -#endif - /* Shutdown queueing discipline. */ dev_shutdown(dev); @@ -3116,7 +3352,7 @@ int unregister_netdevice(struct net_device *dev) /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ - notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* * Flush the multicast chain @@ -3134,10 +3370,33 @@ int unregister_netdevice(struct net_device *dev) /* Finish processing unregister after unlock */ net_set_todo(dev); + synchronize_net(); + dev_put(dev); return 0; } +/** + * unregister_netdev - remove device from the kernel + * @dev: device + * + * This function shuts down a device interface and removes it + * from the kernel tables. On success 0 is returned, on a failure + * a negative errno code is returned. + * + * This is just a wrapper for unregister_netdevice that takes + * the rtnl semaphore. In general you want to use this and not + * unregister_netdevice. + */ +void unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + unregister_netdevice(dev); + rtnl_unlock(); +} + +EXPORT_SYMBOL(unregister_netdev); + #ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, @@ -3202,6 +3461,8 @@ static int __init net_dev_init(void) BUG_ON(!dev_boot_phase); + net_random_init(); + if (dev_proc_init()) goto out; @@ -3222,14 +3483,11 @@ static int __init net_dev_init(void) * Initialise the packet receive queues. */ - for (i = 0; i < NR_CPUS; i++) { + for_each_possible_cpu(i) { struct softnet_data *queue; queue = &per_cpu(softnet_data, i); skb_queue_head_init(&queue->input_pkt_queue); - queue->throttle = 0; - queue->cng_level = 0; - queue->avg_blog = 10; /* arbitrary non-zero */ queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); set_bit(__LINK_STATE_START, &queue->backlog_dev.state); @@ -3238,11 +3496,6 @@ static int __init net_dev_init(void) atomic_set(&queue->backlog_dev.refcnt, 1); } -#ifdef OFFLINE_SAMPLE - samp_timer.expires = jiffies + (10 * HZ); - add_timer(&samp_timer); -#endif - dev_boot_phase = 0; open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); @@ -3258,28 +3511,24 @@ out: subsys_initcall(net_dev_init); -EXPORT_SYMBOL(__dev_get); -EXPORT_SYMBOL(__dev_get_by_flags); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(__skb_linearize); -EXPORT_SYMBOL(call_netdevice_notifiers); +EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); EXPORT_SYMBOL(dev_close); EXPORT_SYMBOL(dev_get_by_flags); EXPORT_SYMBOL(dev_get_by_index); EXPORT_SYMBOL(dev_get_by_name); -EXPORT_SYMBOL(dev_getbyhwaddr); -EXPORT_SYMBOL(dev_ioctl); -EXPORT_SYMBOL(dev_new_index); EXPORT_SYMBOL(dev_open); EXPORT_SYMBOL(dev_queue_xmit); -EXPORT_SYMBOL(dev_queue_xmit_nit); EXPORT_SYMBOL(dev_remove_pack); EXPORT_SYMBOL(dev_set_allmulti); EXPORT_SYMBOL(dev_set_promiscuity); +EXPORT_SYMBOL(dev_change_flags); +EXPORT_SYMBOL(dev_set_mtu); +EXPORT_SYMBOL(dev_set_mac_address); EXPORT_SYMBOL(free_netdev); EXPORT_SYMBOL(netdev_boot_setup_check); EXPORT_SYMBOL(netdev_set_master); @@ -3293,26 +3542,19 @@ EXPORT_SYMBOL(skb_checksum_help); EXPORT_SYMBOL(synchronize_net); EXPORT_SYMBOL(unregister_netdevice); EXPORT_SYMBOL(unregister_netdevice_notifier); +EXPORT_SYMBOL(net_enable_timestamp); +EXPORT_SYMBOL(net_disable_timestamp); +EXPORT_SYMBOL(dev_get_flags); +EXPORT_SYMBOL(skb_checksum_setup); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); +EXPORT_SYMBOL(br_fdb_get_hook); +EXPORT_SYMBOL(br_fdb_put_hook); #endif -/* for 801q VLAN support */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -EXPORT_SYMBOL(dev_change_flags); -#endif + #ifdef CONFIG_KMOD EXPORT_SYMBOL(dev_load); #endif -#ifdef CONFIG_NET_HW_FLOWCONTROL -EXPORT_SYMBOL(netdev_dropping); -EXPORT_SYMBOL(netdev_fc_xoff); -EXPORT_SYMBOL(netdev_register_fc); -EXPORT_SYMBOL(netdev_unregister_fc); -#endif -#ifdef CONFIG_NET_FASTROUTE -EXPORT_SYMBOL(netdev_fastroute); -EXPORT_SYMBOL(netdev_fastroute_obstacles); -#endif EXPORT_PER_CPU_SYMBOL(softnet_data);