net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #include <linux/proc_fs.h>
  98 #include <linux/seq_file.h>
  99 #include <linux/stat.h>
 100 #include <linux/if_bridge.h>
 101 #include <linux/divert.h>
 102 #include <net/dst.h>
 103 #include <net/pkt_sched.h>
 104 #include <net/checksum.h>
 105 #include <linux/highmem.h>
 106 #include <linux/init.h>
 107 #include <linux/kmod.h>
 108 #include <linux/module.h>
 109 #include <linux/kallsyms.h>
 110 #include <linux/netpoll.h>
 111 #include <linux/rcupdate.h>
 112 #include <linux/delay.h>
 113 #include <linux/wireless.h>
 114 #include <net/iw_handler.h>
 115 #include <asm/current.h>
 116 #include <linux/audit.h>
 117 #include <linux/dmaengine.h>
 118 #include <linux/err.h>
 119 #include <linux/ctype.h>
 120 #include <linux/vs_base.h>
 121 #include <linux/vs_network.h>
 122
 123 #ifdef CONFIG_XEN
 124 #include <net/ip.h>
 125 #include <linux/tcp.h>
 126 #include <linux/udp.h>
 127 #endif
 128
 129 /*
 130  *      The list of packet types we will receive (as opposed to discard)
 131  *      and the routines to invoke.
 132  *
 133  *      Why 16. Because with 16 the only overlap we get on a hash of the
 134  *      low nibble of the protocol value is RARP/SNAP/X.25.
 135  *
 136  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 137  *             sure which should go first, but I bet it won't make much
 138  *             difference if we are running VLANs.  The good news is that
 139  *             this protocol won't be in the list unless compiled in, so
 140  *             the average user (w/out VLANs) will not be adversely affected.
 141  *             --BLG
 142  *
 143  *              0800    IP
 144  *              8100    802.1Q VLAN
 145  *              0001    802.3
 146  *              0002    AX.25
 147  *              0004    802.2
 148  *              8035    RARP
 149  *              0005    SNAP
 150  *              0805    X.25
 151  *              0806    ARP
 152  *              8137    IPX
 153  *              0009    Localtalk
 154  *              86DD    IPv6
 155  */
 156
 157 static DEFINE_SPINLOCK(ptype_lock);
 158 static struct list_head ptype_base[16]; /* 16 way hashed list */
 159 static struct list_head ptype_all;              /* Taps */
 160
 161 #ifdef CONFIG_NET_DMA
 162 static struct dma_client *net_dma_client;
 163 static unsigned int net_dma_count;
 164 static spinlock_t net_dma_event_lock;
 165 #endif
 166
 167 /*
 168  * The @dev_base list is protected by @dev_base_lock and the rtnl
 169  * semaphore.
 170  *
 171  * Pure readers hold dev_base_lock for reading.
 172  *
 173  * Writers must hold the rtnl semaphore while they loop through the
 174  * dev_base list, and hold dev_base_lock for writing when they do the
 175  * actual updates.  This allows pure readers to access the list even
 176  * while a writer is preparing to update it.
 177  *
 178  * To put it another way, dev_base_lock is held for writing only to
 179  * protect against pure readers; the rtnl semaphore provides the
 180  * protection against other writers.
 181  *
 182  * See, for example usages, register_netdevice() and
 183  * unregister_netdevice(), which must be called with the rtnl
 184  * semaphore held.
 185  */
 186 struct net_device *dev_base;
 187 static struct net_device **dev_tail = &dev_base;
 188 DEFINE_RWLOCK(dev_base_lock);
 189
 190 EXPORT_SYMBOL(dev_base);
 191 EXPORT_SYMBOL(dev_base_lock);
 192
 193 #define NETDEV_HASHBITS 8
 194 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 195 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 196
 197 static inline struct hlist_head *dev_name_hash(const char *name)
 198 {
 199         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 200         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 201 }
 202
 203 static inline struct hlist_head *dev_index_hash(int ifindex)
 204 {
 205         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 206 }
 207
 208 /*
 209  *      Our notifier list
 210  */
 211
 212 static RAW_NOTIFIER_HEAD(netdev_chain);
 213
 214 /*
 215  *      Device drivers call our routines to queue packets here. We empty the
 216  *      queue in the local softnet handler.
 217  */
 218 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 219
 220 #ifdef CONFIG_SYSFS
 221 extern int netdev_sysfs_init(void);
 222 extern int netdev_register_sysfs(struct net_device *);
 223 extern void netdev_unregister_sysfs(struct net_device *);
 224 #else
 225 #define netdev_sysfs_init()             (0)
 226 #define netdev_register_sysfs(dev)      (0)
 227 #define netdev_unregister_sysfs(dev)    do { } while(0)
 228 #endif
 229
 230
 231 /*******************************************************************************
 232
 233                 Protocol management and registration routines
 234
 235 *******************************************************************************/
 236
 237 /*
 238  *      For efficiency
 239  */
 240
 241 static int netdev_nit;
 242
 243 /*
 244  *      Add a protocol ID to the list. Now that the input handler is
 245  *      smarter we can dispense with all the messy stuff that used to be
 246  *      here.
 247  *
 248  *      BEWARE!!! Protocol handlers, mangling input packets,
 249  *      MUST BE last in hash buckets and checking protocol handlers
 250  *      MUST start from promiscuous ptype_all chain in net_bh.
 251  *      It is true now, do not change it.
 252  *      Explanation follows: if protocol handler, mangling packet, will
 253  *      be the first on list, it is not able to sense, that packet
 254  *      is cloned and should be copied-on-write, so that it will
 255  *      change it and subsequent readers will get broken packet.
 256  *                                                      --ANK (980803)
 257  */
 258
 259 /**
 260  *      dev_add_pack - add packet handler
 261  *      @pt: packet type declaration
 262  *
 263  *      Add a protocol handler to the networking stack. The passed &packet_type
 264  *      is linked into kernel lists and may not be freed until it has been
 265  *      removed from the kernel lists.
 266  *
 267  *      This call does not sleep therefore it can not
 268  *      guarantee all CPU's that are in middle of receiving packets
 269  *      will see the new packet type (until the next received packet).
 270  */
 271
 272 void dev_add_pack(struct packet_type *pt)
 273 {
 274         int hash;
 275
 276         spin_lock_bh(&ptype_lock);
 277         if (pt->type == htons(ETH_P_ALL)) {
 278                 netdev_nit++;
 279                 list_add_rcu(&pt->list, &ptype_all);
 280         } else {
 281                 hash = ntohs(pt->type) & 15;
 282                 list_add_rcu(&pt->list, &ptype_base[hash]);
 283         }
 284         spin_unlock_bh(&ptype_lock);
 285 }
 286
 287 /**
 288  *      __dev_remove_pack        - remove packet handler
 289  *      @pt: packet type declaration
 290  *
 291  *      Remove a protocol handler that was previously added to the kernel
 292  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 293  *      from the kernel lists and can be freed or reused once this function
 294  *      returns.
 295  *
 296  *      The packet type might still be in use by receivers
 297  *      and must not be freed until after all the CPU's have gone
 298  *      through a quiescent state.
 299  */
 300 void __dev_remove_pack(struct packet_type *pt)
 301 {
 302         struct list_head *head;
 303         struct packet_type *pt1;
 304
 305         spin_lock_bh(&ptype_lock);
 306
 307         if (pt->type == htons(ETH_P_ALL)) {
 308                 netdev_nit--;
 309                 head = &ptype_all;
 310         } else
 311                 head = &ptype_base[ntohs(pt->type) & 15];
 312
 313         list_for_each_entry(pt1, head, list) {
 314                 if (pt == pt1) {
 315                         list_del_rcu(&pt->list);
 316                         goto out;
 317                 }
 318         }
 319
 320         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 321 out:
 322         spin_unlock_bh(&ptype_lock);
 323 }
 324 /**
 325  *      dev_remove_pack  - remove packet handler
 326  *      @pt: packet type declaration
 327  *
 328  *      Remove a protocol handler that was previously added to the kernel
 329  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 330  *      from the kernel lists and can be freed or reused once this function
 331  *      returns.
 332  *
 333  *      This call sleeps to guarantee that no CPU is looking at the packet
 334  *      type after return.
 335  */
 336 void dev_remove_pack(struct packet_type *pt)
 337 {
 338         __dev_remove_pack(pt);
 339
 340         synchronize_net();
 341 }
 342
 343 /******************************************************************************
 344
 345                       Device Boot-time Settings Routines
 346
 347 *******************************************************************************/
 348
 349 /* Boot time configuration table */
 350 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 351
 352 /**
 353  *      netdev_boot_setup_add   - add new setup entry
 354  *      @name: name of the device
 355  *      @map: configured settings for the device
 356  *
 357  *      Adds new setup entry to the dev_boot_setup list.  The function
 358  *      returns 0 on error and 1 on success.  This is a generic routine to
 359  *      all netdevices.
 360  */
 361 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 362 {
 363         struct netdev_boot_setup *s;
 364         int i;
 365
 366         s = dev_boot_setup;
 367         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 368                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 369                         memset(s[i].name, 0, sizeof(s[i].name));
 370                         strcpy(s[i].name, name);
 371                         memcpy(&s[i].map, map, sizeof(s[i].map));
 372                         break;
 373                 }
 374         }
 375
 376         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 377 }
 378
 379 /**
 380  *      netdev_boot_setup_check - check boot time settings
 381  *      @dev: the netdevice
 382  *
 383  *      Check boot time settings for the device.
 384  *      The found settings are set for the device to be used
 385  *      later in the device probing.
 386  *      Returns 0 if no settings found, 1 if they are.
 387  */
 388 int netdev_boot_setup_check(struct net_device *dev)
 389 {
 390         struct netdev_boot_setup *s = dev_boot_setup;
 391         int i;
 392
 393         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 394                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 395                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 396                         dev->irq        = s[i].map.irq;
 397                         dev->base_addr  = s[i].map.base_addr;
 398                         dev->mem_start  = s[i].map.mem_start;
 399                         dev->mem_end    = s[i].map.mem_end;
 400                         return 1;
 401                 }
 402         }
 403         return 0;
 404 }
 405
 406
 407 /**
 408  *      netdev_boot_base        - get address from boot time settings
 409  *      @prefix: prefix for network device
 410  *      @unit: id for network device
 411  *
 412  *      Check boot time settings for the base address of device.
 413  *      The found settings are set for the device to be used
 414  *      later in the device probing.
 415  *      Returns 0 if no settings found.
 416  */
 417 unsigned long netdev_boot_base(const char *prefix, int unit)
 418 {
 419         const struct netdev_boot_setup *s = dev_boot_setup;
 420         char name[IFNAMSIZ];
 421         int i;
 422
 423         sprintf(name, "%s%d", prefix, unit);
 424
 425         /*
 426          * If device already registered then return base of 1
 427          * to indicate not to probe for this interface
 428          */
 429         if (__dev_get_by_name(name))
 430                 return 1;
 431
 432         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 433                 if (!strcmp(name, s[i].name))
 434                         return s[i].map.base_addr;
 435         return 0;
 436 }
 437
 438 /*
 439  * Saves at boot time configured settings for any netdevice.
 440  */
 441 int __init netdev_boot_setup(char *str)
 442 {
 443         int ints[5];
 444         struct ifmap map;
 445
 446         str = get_options(str, ARRAY_SIZE(ints), ints);
 447         if (!str || !*str)
 448                 return 0;
 449
 450         /* Save settings */
 451         memset(&map, 0, sizeof(map));
 452         if (ints[0] > 0)
 453                 map.irq = ints[1];
 454         if (ints[0] > 1)
 455                 map.base_addr = ints[2];
 456         if (ints[0] > 2)
 457                 map.mem_start = ints[3];
 458         if (ints[0] > 3)
 459                 map.mem_end = ints[4];
 460
 461         /* Add new entry to the list */
 462         return netdev_boot_setup_add(str, &map);
 463 }
 464
 465 __setup("netdev=", netdev_boot_setup);
 466
 467 /*******************************************************************************
 468
 469                             Device Interface Subroutines
 470
 471 *******************************************************************************/
 472
 473 /**
 474  *      __dev_get_by_name       - find a device by its name
 475  *      @name: name to find
 476  *
 477  *      Find an interface by name. Must be called under RTNL semaphore
 478  *      or @dev_base_lock. If the name is found a pointer to the device
 479  *      is returned. If the name is not found then %NULL is returned. The
 480  *      reference counters are not incremented so the caller must be
 481  *      careful with locks.
 482  */
 483
 484 struct net_device *__dev_get_by_name(const char *name)
 485 {
 486         struct hlist_node *p;
 487
 488         hlist_for_each(p, dev_name_hash(name)) {
 489                 struct net_device *dev
 490                         = hlist_entry(p, struct net_device, name_hlist);
 491                 if (!strncmp(dev->name, name, IFNAMSIZ))
 492                         return dev;
 493         }
 494         return NULL;
 495 }
 496
 497 /**
 498  *      dev_get_by_name         - find a device by its name
 499  *      @name: name to find
 500  *
 501  *      Find an interface by name. This can be called from any
 502  *      context and does its own locking. The returned handle has
 503  *      the usage count incremented and the caller must use dev_put() to
 504  *      release it when it is no longer needed. %NULL is returned if no
 505  *      matching device is found.
 506  */
 507
 508 struct net_device *dev_get_by_name(const char *name)
 509 {
 510         struct net_device *dev;
 511
 512         read_lock(&dev_base_lock);
 513         dev = __dev_get_by_name(name);
 514         if (dev)
 515                 dev_hold(dev);
 516         read_unlock(&dev_base_lock);
 517         return dev;
 518 }
 519
 520 /**
 521  *      __dev_get_by_index - find a device by its ifindex
 522  *      @ifindex: index of device
 523  *
 524  *      Search for an interface by index. Returns %NULL if the device
 525  *      is not found or a pointer to the device. The device has not
 526  *      had its reference counter increased so the caller must be careful
 527  *      about locking. The caller must hold either the RTNL semaphore
 528  *      or @dev_base_lock.
 529  */
 530
 531 struct net_device *__dev_get_by_index(int ifindex)
 532 {
 533         struct hlist_node *p;
 534
 535         hlist_for_each(p, dev_index_hash(ifindex)) {
 536                 struct net_device *dev
 537                         = hlist_entry(p, struct net_device, index_hlist);
 538                 if (dev->ifindex == ifindex)
 539                         return dev;
 540         }
 541         return NULL;
 542 }
 543
 544
 545 /**
 546  *      dev_get_by_index - find a device by its ifindex
 547  *      @ifindex: index of device
 548  *
 549  *      Search for an interface by index. Returns NULL if the device
 550  *      is not found or a pointer to the device. The device returned has
 551  *      had a reference added and the pointer is safe until the user calls
 552  *      dev_put to indicate they have finished with it.
 553  */
 554
 555 struct net_device *dev_get_by_index(int ifindex)
 556 {
 557         struct net_device *dev;
 558
 559         read_lock(&dev_base_lock);
 560         dev = __dev_get_by_index(ifindex);
 561         if (dev)
 562                 dev_hold(dev);
 563         read_unlock(&dev_base_lock);
 564         return dev;
 565 }
 566
 567 /**
 568  *      dev_getbyhwaddr - find a device by its hardware address
 569  *      @type: media type of device
 570  *      @ha: hardware address
 571  *
 572  *      Search for an interface by MAC address. Returns NULL if the device
 573  *      is not found or a pointer to the device. The caller must hold the
 574  *      rtnl semaphore. The returned device has not had its ref count increased
 575  *      and the caller must therefore be careful about locking
 576  *
 577  *      BUGS:
 578  *      If the API was consistent this would be __dev_get_by_hwaddr
 579  */
 580
 581 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 582 {
 583         struct net_device *dev;
 584
 585         ASSERT_RTNL();
 586
 587         for (dev = dev_base; dev; dev = dev->next)
 588                 if (dev->type == type &&
 589                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 590                         break;
 591         return dev;
 592 }
 593
 594 EXPORT_SYMBOL(dev_getbyhwaddr);
 595
 596 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 597 {
 598         struct net_device *dev;
 599
 600         rtnl_lock();
 601         for (dev = dev_base; dev; dev = dev->next) {
 602                 if (dev->type == type) {
 603                         dev_hold(dev);
 604                         break;
 605                 }
 606         }
 607         rtnl_unlock();
 608         return dev;
 609 }
 610
 611 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 612
 613 /**
 614  *      dev_get_by_flags - find any device with given flags
 615  *      @if_flags: IFF_* values
 616  *      @mask: bitmask of bits in if_flags to check
 617  *
 618  *      Search for any interface with the given flags. Returns NULL if a device
 619  *      is not found or a pointer to the device. The device returned has
 620  *      had a reference added and the pointer is safe until the user calls
 621  *      dev_put to indicate they have finished with it.
 622  */
 623
 624 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 625 {
 626         struct net_device *dev;
 627
 628         read_lock(&dev_base_lock);
 629         for (dev = dev_base; dev != NULL; dev = dev->next) {
 630                 if (((dev->flags ^ if_flags) & mask) == 0) {
 631                         dev_hold(dev);
 632                         break;
 633                 }
 634         }
 635         read_unlock(&dev_base_lock);
 636         return dev;
 637 }
 638
 639 /**
 640  *      dev_valid_name - check if name is okay for network device
 641  *      @name: name string
 642  *
 643  *      Network device names need to be valid file names to
 644  *      to allow sysfs to work.  We also disallow any kind of
 645  *      whitespace.
 646  */
 647 int dev_valid_name(const char *name)
 648 {
 649         if (*name == '\0')
 650                 return 0;
 651         if (!strcmp(name, ".") || !strcmp(name, ".."))
 652                 return 0;
 653
 654         while (*name) {
 655                 if (*name == '/' || isspace(*name))
 656                         return 0;
 657                 name++;
 658         }
 659         return 1;
 660 }
 661
 662 /**
 663  *      dev_alloc_name - allocate a name for a device
 664  *      @dev: device
 665  *      @name: name format string
 666  *
 667  *      Passed a format string - eg "lt%d" it will try and find a suitable
 668  *      id. It scans list of devices to build up a free map, then chooses
 669  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 670  *      while allocating the name and adding the device in order to avoid
 671  *      duplicates.
 672  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 673  *      Returns the number of the unit assigned or a negative errno code.
 674  */
 675
 676 int dev_alloc_name(struct net_device *dev, const char *name)
 677 {
 678         int i = 0;
 679         char buf[IFNAMSIZ];
 680         const char *p;
 681         const int max_netdevices = 8*PAGE_SIZE;
 682         long *inuse;
 683         struct net_device *d;
 684
 685         p = strnchr(name, IFNAMSIZ-1, '%');
 686         if (p) {
 687                 /*
 688                  * Verify the string as this thing may have come from
 689                  * the user.  There must be either one "%d" and no other "%"
 690                  * characters.
 691                  */
 692                 if (p[1] != 'd' || strchr(p + 2, '%'))
 693                         return -EINVAL;
 694
 695                 /* Use one page as a bit array of possible slots */
 696                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 697                 if (!inuse)
 698                         return -ENOMEM;
 699
 700                 for (d = dev_base; d; d = d->next) {
 701                         if (!sscanf(d->name, name, &i))
 702                                 continue;
 703                         if (i < 0 || i >= max_netdevices)
 704                                 continue;
 705
 706                         /*  avoid cases where sscanf is not exact inverse of printf */
 707                         snprintf(buf, sizeof(buf), name, i);
 708                         if (!strncmp(buf, d->name, IFNAMSIZ))
 709                                 set_bit(i, inuse);
 710                 }
 711
 712                 i = find_first_zero_bit(inuse, max_netdevices);
 713                 free_page((unsigned long) inuse);
 714         }
 715
 716         snprintf(buf, sizeof(buf), name, i);
 717         if (!__dev_get_by_name(buf)) {
 718                 strlcpy(dev->name, buf, IFNAMSIZ);
 719                 return i;
 720         }
 721
 722         /* It is possible to run out of possible slots
 723          * when the name is long and there isn't enough space left
 724          * for the digits, or if all bits are used.
 725          */
 726         return -ENFILE;
 727 }
 728
 729
 730 /**
 731  *      dev_change_name - change name of a device
 732  *      @dev: device
 733  *      @newname: name (or format string) must be at least IFNAMSIZ
 734  *
 735  *      Change name of a device, can pass format strings "eth%d".
 736  *      for wildcarding.
 737  */
 738 int dev_change_name(struct net_device *dev, char *newname)
 739 {
 740         int err = 0;
 741
 742         ASSERT_RTNL();
 743
 744         if (dev->flags & IFF_UP)
 745                 return -EBUSY;
 746
 747         if (!dev_valid_name(newname))
 748                 return -EINVAL;
 749
 750         if (strchr(newname, '%')) {
 751                 err = dev_alloc_name(dev, newname);
 752                 if (err < 0)
 753                         return err;
 754                 strcpy(newname, dev->name);
 755         }
 756         else if (__dev_get_by_name(newname))
 757                 return -EEXIST;
 758         else
 759                 strlcpy(dev->name, newname, IFNAMSIZ);
 760
 761         err = class_device_rename(&dev->class_dev, dev->name);
 762         if (!err) {
 763                 hlist_del(&dev->name_hlist);
 764                 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 765                 raw_notifier_call_chain(&netdev_chain,
 766                                 NETDEV_CHANGENAME, dev);
 767         }
 768
 769         return err;
 770 }
 771
 772 /**
 773  *      netdev_features_change - device changes features
 774  *      @dev: device to cause notification
 775  *
 776  *      Called to indicate a device has changed features.
 777  */
 778 void netdev_features_change(struct net_device *dev)
 779 {
 780         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 781 }
 782 EXPORT_SYMBOL(netdev_features_change);
 783
 784 /**
 785  *      netdev_state_change - device changes state
 786  *      @dev: device to cause notification
 787  *
 788  *      Called to indicate a device has changed state. This function calls
 789  *      the notifier chains for netdev_chain and sends a NEWLINK message
 790  *      to the routing socket.
 791  */
 792 void netdev_state_change(struct net_device *dev)
 793 {
 794         if (dev->flags & IFF_UP) {
 795                 raw_notifier_call_chain(&netdev_chain,
 796                                 NETDEV_CHANGE, dev);
 797                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 798         }
 799 }
 800
 801 /**
 802  *      dev_load        - load a network module
 803  *      @name: name of interface
 804  *
 805  *      If a network interface is not present and the process has suitable
 806  *      privileges this function loads the module. If module loading is not
 807  *      available in this kernel then it becomes a nop.
 808  */
 809
 810 void dev_load(const char *name)
 811 {
 812         struct net_device *dev;
 813
 814         read_lock(&dev_base_lock);
 815         dev = __dev_get_by_name(name);
 816         read_unlock(&dev_base_lock);
 817
 818         if (!dev && capable(CAP_SYS_MODULE))
 819                 request_module("%s", name);
 820 }
 821
 822 static int default_rebuild_header(struct sk_buff *skb)
 823 {
 824         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 825                skb->dev ? skb->dev->name : "NULL!!!");
 826         kfree_skb(skb);
 827         return 1;
 828 }
 829
 830
 831 /**
 832  *      dev_open        - prepare an interface for use.
 833  *      @dev:   device to open
 834  *
 835  *      Takes a device from down to up state. The device's private open
 836  *      function is invoked and then the multicast lists are loaded. Finally
 837  *      the device is moved into the up state and a %NETDEV_UP message is
 838  *      sent to the netdev notifier chain.
 839  *
 840  *      Calling this function on an active interface is a nop. On a failure
 841  *      a negative errno code is returned.
 842  */
 843 int dev_open(struct net_device *dev)
 844 {
 845         int ret = 0;
 846
 847         /*
 848          *      Is it already up?
 849          */
 850
 851         if (dev->flags & IFF_UP)
 852                 return 0;
 853
 854         /*
 855          *      Is it even present?
 856          */
 857         if (!netif_device_present(dev))
 858                 return -ENODEV;
 859
 860         /*
 861          *      Call device private open method
 862          */
 863         set_bit(__LINK_STATE_START, &dev->state);
 864         if (dev->open) {
 865                 ret = dev->open(dev);
 866                 if (ret)
 867                         clear_bit(__LINK_STATE_START, &dev->state);
 868         }
 869
 870         /*
 871          *      If it went open OK then:
 872          */
 873
 874         if (!ret) {
 875                 /*
 876                  *      Set the flags.
 877                  */
 878                 dev->flags |= IFF_UP;
 879
 880                 /*
 881                  *      Initialize multicasting status
 882                  */
 883                 dev_mc_upload(dev);
 884
 885                 /*
 886                  *      Wakeup transmit queue engine
 887                  */
 888                 dev_activate(dev);
 889
 890                 /*
 891                  *      ... and announce new interface.
 892                  */
 893                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 894         }
 895         return ret;
 896 }
 897
 898 /**
 899  *      dev_close - shutdown an interface.
 900  *      @dev: device to shutdown
 901  *
 902  *      This function moves an active device into down state. A
 903  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 904  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 905  *      chain.
 906  */
 907 int dev_close(struct net_device *dev)
 908 {
 909         if (!(dev->flags & IFF_UP))
 910                 return 0;
 911
 912         /*
 913          *      Tell people we are going down, so that they can
 914          *      prepare to death, when device is still operating.
 915          */
 916         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 917
 918         dev_deactivate(dev);
 919
 920         clear_bit(__LINK_STATE_START, &dev->state);
 921
 922         /* Synchronize to scheduled poll. We cannot touch poll list,
 923          * it can be even on different cpu. So just clear netif_running(),
 924          * and wait when poll really will happen. Actually, the best place
 925          * for this is inside dev->stop() after device stopped its irq
 926          * engine, but this requires more changes in devices. */
 927
 928         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 929         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 930                 /* No hurry. */
 931                 msleep(1);
 932         }
 933
 934         /*
 935          *      Call the device specific close. This cannot fail.
 936          *      Only if device is UP
 937          *
 938          *      We allow it to be called even after a DETACH hot-plug
 939          *      event.
 940          */
 941         if (dev->stop)
 942                 dev->stop(dev);
 943
 944         /*
 945          *      Device is now down.
 946          */
 947
 948         dev->flags &= ~IFF_UP;
 949
 950         /*
 951          * Tell people we are down
 952          */
 953         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 954
 955         return 0;
 956 }
 957
 958
 959 /*
 960  *      Device change register/unregister. These are not inline or static
 961  *      as we export them to the world.
 962  */
 963
 964 /**
 965  *      register_netdevice_notifier - register a network notifier block
 966  *      @nb: notifier
 967  *
 968  *      Register a notifier to be called when network device events occur.
 969  *      The notifier passed is linked into the kernel structures and must
 970  *      not be reused until it has been unregistered. A negative errno code
 971  *      is returned on a failure.
 972  *
 973  *      When registered all registration and up events are replayed
 974  *      to the new notifier to allow device to have a race free
 975  *      view of the network device list.
 976  */
 977
 978 int register_netdevice_notifier(struct notifier_block *nb)
 979 {
 980         struct net_device *dev;
 981         int err;
 982
 983         rtnl_lock();
 984         err = raw_notifier_chain_register(&netdev_chain, nb);
 985         if (!err) {
 986                 for (dev = dev_base; dev; dev = dev->next) {
 987                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
 988
 989                         if (dev->flags & IFF_UP)
 990                                 nb->notifier_call(nb, NETDEV_UP, dev);
 991                 }
 992         }
 993         rtnl_unlock();
 994         return err;
 995 }
 996
 997 /**
 998  *      unregister_netdevice_notifier - unregister a network notifier block
 999  *      @nb: notifier
1000  *
1001  *      Unregister a notifier previously registered by
1002  *      register_netdevice_notifier(). The notifier is unlinked into the
1003  *      kernel structures and may then be reused. A negative errno code
1004  *      is returned on a failure.
1005  */
1006
1007 int unregister_netdevice_notifier(struct notifier_block *nb)
1008 {
1009         int err;
1010
1011         rtnl_lock();
1012         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1013         rtnl_unlock();
1014         return err;
1015 }
1016
1017 /**
1018  *      call_netdevice_notifiers - call all network notifier blocks
1019  *      @val: value passed unmodified to notifier function
1020  *      @v:   pointer passed unmodified to notifier function
1021  *
1022  *      Call all network notifier blocks.  Parameters and return value
1023  *      are as for raw_notifier_call_chain().
1024  */
1025
1026 int call_netdevice_notifiers(unsigned long val, void *v)
1027 {
1028         return raw_notifier_call_chain(&netdev_chain, val, v);
1029 }
1030
1031 /* When > 0 there are consumers of rx skb time stamps */
1032 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1033
1034 void net_enable_timestamp(void)
1035 {
1036         atomic_inc(&netstamp_needed);
1037 }
1038
1039 void net_disable_timestamp(void)
1040 {
1041         atomic_dec(&netstamp_needed);
1042 }
1043
1044 void __net_timestamp(struct sk_buff *skb)
1045 {
1046         struct timeval tv;
1047
1048         do_gettimeofday(&tv);
1049         skb_set_timestamp(skb, &tv);
1050 }
1051 EXPORT_SYMBOL(__net_timestamp);
1052
1053 static inline void net_timestamp(struct sk_buff *skb)
1054 {
1055         if (atomic_read(&netstamp_needed))
1056                 __net_timestamp(skb);
1057         else {
1058                 skb->tstamp.off_sec = 0;
1059                 skb->tstamp.off_usec = 0;
1060         }
1061 }
1062
1063 /*
1064  *      Support routine. Sends outgoing frames to any network
1065  *      taps currently in use.
1066  */
1067
1068 #if !((defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)))
1069 static
1070 #endif
1071 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1072 {
1073         struct packet_type *ptype;
1074
1075         net_timestamp(skb);
1076
1077         rcu_read_lock();
1078         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1079                 /* Never send packets back to the socket
1080                  * they originated from - MvS (miquels@drinkel.ow.org)
1081                  */
1082                 if ((ptype->dev == dev || !ptype->dev) &&
1083                     (ptype->af_packet_priv == NULL ||
1084                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1085                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1086                         if (!skb2)
1087                                 break;
1088
1089                         /* skb->nh should be correctly
1090                            set by sender, so that the second statement is
1091                            just protection against buggy protocols.
1092                          */
1093                         skb2->mac.raw = skb2->data;
1094
1095                         if (skb2->nh.raw < skb2->data ||
1096                             skb2->nh.raw > skb2->tail) {
1097                                 if (net_ratelimit())
1098                                         printk(KERN_CRIT "protocol %04x is "
1099                                                "buggy, dev %s\n",
1100                                                skb2->protocol, dev->name);
1101                                 skb2->nh.raw = skb2->data;
1102                         }
1103
1104                         skb2->h.raw = skb2->nh.raw;
1105                         skb2->pkt_type = PACKET_OUTGOING;
1106                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1107                 }
1108         }
1109         rcu_read_unlock();
1110 }
1111
1112
1113 void __netif_schedule(struct net_device *dev)
1114 {
1115         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1116                 unsigned long flags;
1117                 struct softnet_data *sd;
1118
1119                 local_irq_save(flags);
1120                 sd = &__get_cpu_var(softnet_data);
1121                 dev->next_sched = sd->output_queue;
1122                 sd->output_queue = dev;
1123                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1124                 local_irq_restore(flags);
1125         }
1126 }
1127 EXPORT_SYMBOL(__netif_schedule);
1128
1129 void __netif_rx_schedule(struct net_device *dev)
1130 {
1131         unsigned long flags;
1132
1133         local_irq_save(flags);
1134         dev_hold(dev);
1135         list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1136         if (dev->quota < 0)
1137                 dev->quota += dev->weight;
1138         else
1139                 dev->quota = dev->weight;
1140         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1141         local_irq_restore(flags);
1142 }
1143 EXPORT_SYMBOL(__netif_rx_schedule);
1144
1145 void dev_kfree_skb_any(struct sk_buff *skb)
1146 {
1147         if (in_irq() || irqs_disabled())
1148                 dev_kfree_skb_irq(skb);
1149         else
1150                 dev_kfree_skb(skb);
1151 }
1152 EXPORT_SYMBOL(dev_kfree_skb_any);
1153
1154
1155 /* Hot-plugging. */
1156 void netif_device_detach(struct net_device *dev)
1157 {
1158         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1159             netif_running(dev)) {
1160                 netif_stop_queue(dev);
1161         }
1162 }
1163 EXPORT_SYMBOL(netif_device_detach);
1164
1165 void netif_device_attach(struct net_device *dev)
1166 {
1167         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1168             netif_running(dev)) {
1169                 netif_wake_queue(dev);
1170                 __netdev_watchdog_up(dev);
1171         }
1172 }
1173 EXPORT_SYMBOL(netif_device_attach);
1174
1175
1176 /*
1177  * Invalidate hardware checksum when packet is to be mangled, and
1178  * complete checksum manually on outgoing path.
1179  */
1180 int skb_checksum_help(struct sk_buff *skb, int inward)
1181 {
1182         unsigned int csum;
1183         int ret = 0, offset = skb->h.raw - skb->data;
1184
1185         if (inward)
1186                 goto out_set_summed;
1187
1188         if (unlikely(skb_shinfo(skb)->gso_size)) {
1189                 /* Let GSO fix up the checksum. */
1190                 goto out_set_summed;
1191         }
1192
1193         if (skb_cloned(skb)) {
1194                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1195                 if (ret)
1196                         goto out;
1197         }
1198
1199         BUG_ON(offset > (int)skb->len);
1200         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1201
1202         offset = skb->tail - skb->h.raw;
1203         BUG_ON(offset <= 0);
1204         BUG_ON(skb->csum + 2 > offset);
1205
1206         *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1207
1208 out_set_summed:
1209         skb->ip_summed = CHECKSUM_NONE;
1210 out:
1211         return ret;
1212 }
1213
1214 /**
1215  *      skb_gso_segment - Perform segmentation on skb.
1216  *      @skb: buffer to segment
1217  *      @features: features for the output path (see dev->features)
1218  *
1219  *      This function segments the given skb and returns a list of segments.
1220  *
1221  *      It may return NULL if the skb requires no segmentation.  This is
1222  *      only possible when GSO is used for verifying header integrity.
1223  */
1224 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1225 {
1226         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1227         struct packet_type *ptype;
1228         int type = skb->protocol;
1229         int err;
1230
1231         BUG_ON(skb_shinfo(skb)->frag_list);
1232
1233         skb->mac.raw = skb->data;
1234         skb->mac_len = skb->nh.raw - skb->data;
1235         __skb_pull(skb, skb->mac_len);
1236
1237         if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1238                 if (skb_header_cloned(skb) &&
1239                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1240                         return ERR_PTR(err);
1241         }
1242
1243         rcu_read_lock();
1244         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1245                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1246                         if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1247                                 err = ptype->gso_send_check(skb);
1248                                 segs = ERR_PTR(err);
1249                                 if (err || skb_gso_ok(skb, features))
1250                                         break;
1251                                 __skb_push(skb, skb->data - skb->nh.raw);
1252                         }
1253                         segs = ptype->gso_segment(skb, features);
1254                         break;
1255                 }
1256         }
1257         rcu_read_unlock();
1258
1259         __skb_push(skb, skb->data - skb->mac.raw);
1260
1261         return segs;
1262 }
1263
1264 EXPORT_SYMBOL(skb_gso_segment);
1265
1266 /* Take action when hardware reception checksum errors are detected. */
1267 #ifdef CONFIG_BUG
1268 void netdev_rx_csum_fault(struct net_device *dev)
1269 {
1270         if (net_ratelimit()) {
1271                 printk(KERN_ERR "%s: hw csum failure.\n",
1272                         dev ? dev->name : "<unknown>");
1273                 dump_stack();
1274         }
1275 }
1276 EXPORT_SYMBOL(netdev_rx_csum_fault);
1277 #endif
1278
1279 /* Actually, we should eliminate this check as soon as we know, that:
1280  * 1. IOMMU is present and allows to map all the memory.
1281  * 2. No high memory really exists on this machine.
1282  */
1283
1284 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1285 {
1286 #ifdef CONFIG_HIGHMEM
1287         int i;
1288
1289         if (dev->features & NETIF_F_HIGHDMA)
1290                 return 0;
1291
1292         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1293                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1294                         return 1;
1295
1296 #endif
1297         return 0;
1298 }
1299
1300 struct dev_gso_cb {
1301         void (*destructor)(struct sk_buff *skb);
1302 };
1303
1304 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1305
1306 static void dev_gso_skb_destructor(struct sk_buff *skb)
1307 {
1308         struct dev_gso_cb *cb;
1309
1310         do {
1311                 struct sk_buff *nskb = skb->next;
1312
1313                 skb->next = nskb->next;
1314                 nskb->next = NULL;
1315                 kfree_skb(nskb);
1316         } while (skb->next);
1317
1318         cb = DEV_GSO_CB(skb);
1319         if (cb->destructor)
1320                 cb->destructor(skb);
1321 }
1322
1323 /**
1324  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1325  *      @skb: buffer to segment
1326  *
1327  *      This function segments the given skb and stores the list of segments
1328  *      in skb->next.
1329  */
1330 static int dev_gso_segment(struct sk_buff *skb)
1331 {
1332         struct net_device *dev = skb->dev;
1333         struct sk_buff *segs;
1334         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1335                                          NETIF_F_SG : 0);
1336
1337         segs = skb_gso_segment(skb, features);
1338
1339         /* Verifying header integrity only. */
1340         if (!segs)
1341                 return 0;
1342
1343         if (unlikely(IS_ERR(segs)))
1344                 return PTR_ERR(segs);
1345
1346         skb->next = segs;
1347         DEV_GSO_CB(skb)->destructor = skb->destructor;
1348         skb->destructor = dev_gso_skb_destructor;
1349
1350         return 0;
1351 }
1352
1353 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1354 {
1355         if (likely(!skb->next)) {
1356                 if (netdev_nit)
1357                         dev_queue_xmit_nit(skb, dev);
1358
1359                 if (netif_needs_gso(dev, skb)) {
1360                         if (unlikely(dev_gso_segment(skb)))
1361                                 goto out_kfree_skb;
1362                         if (skb->next)
1363                                 goto gso;
1364                 }
1365
1366                 return dev->hard_start_xmit(skb, dev);
1367         }
1368
1369 gso:
1370         do {
1371                 struct sk_buff *nskb = skb->next;
1372                 int rc;
1373
1374                 skb->next = nskb->next;
1375                 nskb->next = NULL;
1376                 rc = dev->hard_start_xmit(nskb, dev);
1377                 if (unlikely(rc)) {
1378                         nskb->next = skb->next;
1379                         skb->next = nskb;
1380                         return rc;
1381                 }
1382                 if (unlikely(netif_queue_stopped(dev) && skb->next))
1383                         return NETDEV_TX_BUSY;
1384         } while (skb->next);
1385
1386         skb->destructor = DEV_GSO_CB(skb)->destructor;
1387
1388 out_kfree_skb:
1389         kfree_skb(skb);
1390         return 0;
1391 }
1392
1393 #define HARD_TX_LOCK(dev, cpu) {                        \
1394         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1395                 netif_tx_lock(dev);                     \
1396         }                                               \
1397 }
1398
1399 #define HARD_TX_UNLOCK(dev) {                           \
1400         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1401                 netif_tx_unlock(dev);                   \
1402         }                                               \
1403 }
1404
1405 #ifdef CONFIG_XEN
1406 inline int skb_checksum_setup(struct sk_buff *skb)
1407 {
1408         if (skb->proto_csum_blank) {
1409                 if (skb->protocol != htons(ETH_P_IP))
1410                         goto out;
1411                 skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
1412                 if (skb->h.raw >= skb->tail)
1413                         goto out;
1414                 switch (skb->nh.iph->protocol) {
1415                 case IPPROTO_TCP:
1416                         skb->csum = offsetof(struct tcphdr, check);
1417                         break;
1418                 case IPPROTO_UDP:
1419                         skb->csum = offsetof(struct udphdr, check);
1420                         break;
1421                 default:
1422                         if (net_ratelimit())
1423                                 printk(KERN_ERR "Attempting to checksum a non-"
1424                                        "TCP/UDP packet, dropping a protocol"
1425                                        " %d packet", skb->nh.iph->protocol);
1426                         goto out;
1427                 }
1428                 if ((skb->h.raw + skb->csum + 2) > skb->tail)
1429                         goto out;
1430                 skb->ip_summed = CHECKSUM_HW;
1431                 skb->proto_csum_blank = 0;
1432         }
1433         return 0;
1434 out:
1435         return -EPROTO;
1436 }
1437 #else
1438 inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
1439 #endif
1440
1441
1442 /**
1443  *      dev_queue_xmit - transmit a buffer
1444  *      @skb: buffer to transmit
1445  *
1446  *      Queue a buffer for transmission to a network device. The caller must
1447  *      have set the device and priority and built the buffer before calling
1448  *      this function. The function can be called from an interrupt.
1449  *
1450  *      A negative errno code is returned on a failure. A success does not
1451  *      guarantee the frame will be transmitted as it may be dropped due
1452  *      to congestion or traffic shaping.
1453  *
1454  * -----------------------------------------------------------------------------------
1455  *      I notice this method can also return errors from the queue disciplines,
1456  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1457  *      be positive.
1458  *
1459  *      Regardless of the return value, the skb is consumed, so it is currently
1460  *      difficult to retry a send to this method.  (You can bump the ref count
1461  *      before sending to hold a reference for retry if you are careful.)
1462  *
1463  *      When calling this method, interrupts MUST be enabled.  This is because
1464  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1465  *          --BLG
1466  */
1467
1468 int dev_queue_xmit(struct sk_buff *skb)
1469 {
1470         struct net_device *dev = skb->dev;
1471         struct Qdisc *q;
1472         int rc = -ENOMEM;
1473
1474         /* If a checksum-deferred packet is forwarded to a device that needs a
1475          * checksum, correct the pointers and force checksumming.
1476          */
1477         if (skb_checksum_setup(skb))
1478                 goto out_kfree_skb;
1479
1480         /* GSO will handle the following emulations directly. */
1481         if (netif_needs_gso(dev, skb))
1482                 goto gso;
1483
1484         if (skb_shinfo(skb)->frag_list &&
1485             !(dev->features & NETIF_F_FRAGLIST) &&
1486             __skb_linearize(skb))
1487                 goto out_kfree_skb;
1488
1489         /* Fragmented skb is linearized if device does not support SG,
1490          * or if at least one of fragments is in highmem and device
1491          * does not support DMA from it.
1492          */
1493         if (skb_shinfo(skb)->nr_frags &&
1494             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1495             __skb_linearize(skb))
1496                 goto out_kfree_skb;
1497
1498         /* If packet is not checksummed and device does not support
1499          * checksumming for this protocol, complete checksumming here.
1500          */
1501         if (skb->ip_summed == CHECKSUM_HW &&
1502             (!(dev->features & NETIF_F_GEN_CSUM) &&
1503              (!(dev->features & NETIF_F_IP_CSUM) ||
1504               skb->protocol != htons(ETH_P_IP))))
1505                 if (skb_checksum_help(skb, 0))
1506                         goto out_kfree_skb;
1507
1508 gso:
1509         spin_lock_prefetch(&dev->queue_lock);
1510
1511         /* Disable soft irqs for various locks below. Also
1512          * stops preemption for RCU.
1513          */
1514         rcu_read_lock_bh();
1515
1516         /* Updates of qdisc are serialized by queue_lock.
1517          * The struct Qdisc which is pointed to by qdisc is now a
1518          * rcu structure - it may be accessed without acquiring
1519          * a lock (but the structure may be stale.) The freeing of the
1520          * qdisc will be deferred until it's known that there are no
1521          * more references to it.
1522          *
1523          * If the qdisc has an enqueue function, we still need to
1524          * hold the queue_lock before calling it, since queue_lock
1525          * also serializes access to the device queue.
1526          */
1527
1528         q = rcu_dereference(dev->qdisc);
1529 #ifdef CONFIG_NET_CLS_ACT
1530         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1531 #endif
1532         if (q->enqueue) {
1533                 /* Grab device queue */
1534                 spin_lock(&dev->queue_lock);
1535                 q = dev->qdisc;
1536                 if (q->enqueue) {
1537                         rc = q->enqueue(skb, q);
1538                         qdisc_run(dev);
1539                         spin_unlock(&dev->queue_lock);
1540
1541                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1542                         goto out;
1543                 }
1544                 spin_unlock(&dev->queue_lock);
1545         }
1546
1547         /* The device has no queue. Common case for software devices:
1548            loopback, all the sorts of tunnels...
1549
1550            Really, it is unlikely that netif_tx_lock protection is necessary
1551            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1552            counters.)
1553            However, it is possible, that they rely on protection
1554            made by us here.
1555
1556            Check this and shot the lock. It is not prone from deadlocks.
1557            Either shot noqueue qdisc, it is even simpler 8)
1558          */
1559         if (dev->flags & IFF_UP) {
1560                 int cpu = smp_processor_id(); /* ok because BHs are off */
1561
1562                 if (dev->xmit_lock_owner != cpu) {
1563
1564                         HARD_TX_LOCK(dev, cpu);
1565
1566                         if (!netif_queue_stopped(dev)) {
1567                                 rc = 0;
1568                                 if (!dev_hard_start_xmit(skb, dev)) {
1569                                         HARD_TX_UNLOCK(dev);
1570                                         goto out;
1571                                 }
1572                         }
1573                         HARD_TX_UNLOCK(dev);
1574                         if (net_ratelimit())
1575                                 printk(KERN_CRIT "Virtual device %s asks to "
1576                                        "queue packet!\n", dev->name);
1577                 } else {
1578                         /* Recursion is detected! It is possible,
1579                          * unfortunately */
1580                         if (net_ratelimit())
1581                                 printk(KERN_CRIT "Dead loop on virtual device "
1582                                        "%s, fix it urgently!\n", dev->name);
1583                 }
1584         }
1585
1586         rc = -ENETDOWN;
1587         rcu_read_unlock_bh();
1588
1589 out_kfree_skb:
1590         kfree_skb(skb);
1591         return rc;
1592 out:
1593         rcu_read_unlock_bh();
1594         return rc;
1595 }
1596
1597
1598 /*=======================================================================
1599                         Receiver routines
1600   =======================================================================*/
1601
1602 int netdev_max_backlog = 1000;
1603 int netdev_budget = 300;
1604 int weight_p = 64;            /* old backlog weight */
1605
1606 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1607
1608
1609 /**
1610  *      netif_rx        -       post buffer to the network code
1611  *      @skb: buffer to post
1612  *
1613  *      This function receives a packet from a device driver and queues it for
1614  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1615  *      may be dropped during processing for congestion control or by the
1616  *      protocol layers.
1617  *
1618  *      return values:
1619  *      NET_RX_SUCCESS  (no congestion)
1620  *      NET_RX_CN_LOW   (low congestion)
1621  *      NET_RX_CN_MOD   (moderate congestion)
1622  *      NET_RX_CN_HIGH  (high congestion)
1623  *      NET_RX_DROP     (packet was dropped)
1624  *
1625  */
1626
1627 int netif_rx(struct sk_buff *skb)
1628 {
1629         struct softnet_data *queue;
1630         unsigned long flags;
1631
1632         /* if netpoll wants it, pretend we never saw it */
1633         if (netpoll_rx(skb))
1634                 return NET_RX_DROP;
1635
1636         if (!skb->tstamp.off_sec)
1637                 net_timestamp(skb);
1638
1639         /*
1640          * The code is rearranged so that the path is the most
1641          * short when CPU is congested, but is still operating.
1642          */
1643         local_irq_save(flags);
1644         queue = &__get_cpu_var(softnet_data);
1645
1646         __get_cpu_var(netdev_rx_stat).total++;
1647         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1648                 if (queue->input_pkt_queue.qlen) {
1649 enqueue:
1650                         dev_hold(skb->dev);
1651                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1652                         local_irq_restore(flags);
1653                         return NET_RX_SUCCESS;
1654                 }
1655
1656                 netif_rx_schedule(&queue->backlog_dev);
1657                 goto enqueue;
1658         }
1659
1660         __get_cpu_var(netdev_rx_stat).dropped++;
1661         local_irq_restore(flags);
1662
1663         kfree_skb(skb);
1664         return NET_RX_DROP;
1665 }
1666
1667 int netif_rx_ni(struct sk_buff *skb)
1668 {
1669         int err;
1670
1671         preempt_disable();
1672         err = netif_rx(skb);
1673         if (local_softirq_pending())
1674                 do_softirq();
1675         preempt_enable();
1676
1677         return err;
1678 }
1679
1680 EXPORT_SYMBOL(netif_rx_ni);
1681
1682 static inline struct net_device *skb_bond(struct sk_buff *skb)
1683 {
1684         struct net_device *dev = skb->dev;
1685
1686         if (dev->master) {
1687                 if (skb_bond_should_drop(skb)) {
1688                         kfree_skb(skb);
1689                         return NULL;
1690                 }
1691                 skb->dev = dev->master;
1692         }
1693
1694         return dev;
1695 }
1696
1697 static void net_tx_action(struct softirq_action *h)
1698 {
1699         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1700
1701         if (sd->completion_queue) {
1702                 struct sk_buff *clist;
1703
1704                 local_irq_disable();
1705                 clist = sd->completion_queue;
1706                 sd->completion_queue = NULL;
1707                 local_irq_enable();
1708
1709                 while (clist) {
1710                         struct sk_buff *skb = clist;
1711                         clist = clist->next;
1712
1713                         BUG_TRAP(!atomic_read(&skb->users));
1714                         __kfree_skb(skb);
1715                 }
1716         }
1717
1718         if (sd->output_queue) {
1719                 struct net_device *head;
1720
1721                 local_irq_disable();
1722                 head = sd->output_queue;
1723                 sd->output_queue = NULL;
1724                 local_irq_enable();
1725
1726                 while (head) {
1727                         struct net_device *dev = head;
1728                         head = head->next_sched;
1729
1730                         smp_mb__before_clear_bit();
1731                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1732
1733                         if (spin_trylock(&dev->queue_lock)) {
1734                                 qdisc_run(dev);
1735                                 spin_unlock(&dev->queue_lock);
1736                         } else {
1737                                 netif_schedule(dev);
1738                         }
1739                 }
1740         }
1741 }
1742
1743 static __inline__ int deliver_skb(struct sk_buff *skb,
1744                                   struct packet_type *pt_prev,
1745                                   struct net_device *orig_dev)
1746 {
1747         atomic_inc(&skb->users);
1748         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1749 }
1750
1751 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1752 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1753 struct net_bridge;
1754 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1755                                                 unsigned char *addr);
1756 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1757
1758 static __inline__ int handle_bridge(struct sk_buff **pskb,
1759                                     struct packet_type **pt_prev, int *ret,
1760                                     struct net_device *orig_dev)
1761 {
1762         struct net_bridge_port *port;
1763
1764         if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1765             (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1766                 return 0;
1767
1768         if (*pt_prev) {
1769                 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1770                 *pt_prev = NULL;
1771         }
1772
1773         return br_handle_frame_hook(port, pskb);
1774 }
1775 #else
1776 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (0)
1777 #endif
1778
1779 #ifdef CONFIG_NET_CLS_ACT
1780 /* TODO: Maybe we should just force sch_ingress to be compiled in
1781  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1782  * a compare and 2 stores extra right now if we dont have it on
1783  * but have CONFIG_NET_CLS_ACT
1784  * NOTE: This doesnt stop any functionality; if you dont have
1785  * the ingress scheduler, you just cant add policies on ingress.
1786  *
1787  */
1788 static int ing_filter(struct sk_buff *skb)
1789 {
1790         struct Qdisc *q;
1791         struct net_device *dev = skb->dev;
1792         int result = TC_ACT_OK;
1793
1794         if (dev->qdisc_ingress) {
1795                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1796                 if (MAX_RED_LOOP < ttl++) {
1797                         printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
1798                                 skb->input_dev->name, skb->dev->name);
1799                         return TC_ACT_SHOT;
1800                 }
1801
1802                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1803
1804                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1805
1806                 spin_lock(&dev->ingress_lock);
1807                 if ((q = dev->qdisc_ingress) != NULL)
1808                         result = q->enqueue(skb, q);
1809                 spin_unlock(&dev->ingress_lock);
1810
1811         }
1812
1813         return result;
1814 }
1815 #endif
1816
1817 int netif_receive_skb(struct sk_buff *skb)
1818 {
1819         struct packet_type *ptype, *pt_prev;
1820         struct net_device *orig_dev;
1821         int ret = NET_RX_DROP;
1822         unsigned short type;
1823
1824         /* if we've gotten here through NAPI, check netpoll */
1825         if (skb->dev->poll && netpoll_rx(skb))
1826                 return NET_RX_DROP;
1827
1828         if (!skb->tstamp.off_sec)
1829                 net_timestamp(skb);
1830
1831         if (!skb->input_dev)
1832                 skb->input_dev = skb->dev;
1833
1834         orig_dev = skb_bond(skb);
1835
1836         if (!orig_dev)
1837                 return NET_RX_DROP;
1838
1839         __get_cpu_var(netdev_rx_stat).total++;
1840
1841         skb->h.raw = skb->nh.raw = skb->data;
1842         skb->mac_len = skb->nh.raw - skb->mac.raw;
1843
1844         pt_prev = NULL;
1845
1846         rcu_read_lock();
1847
1848 #ifdef CONFIG_NET_CLS_ACT
1849         if (skb->tc_verd & TC_NCLS) {
1850                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1851                 goto ncls;
1852         }
1853 #endif
1854
1855 #ifdef CONFIG_XEN
1856         switch (skb->ip_summed) {
1857         case CHECKSUM_UNNECESSARY:
1858                 skb->proto_data_valid = 1;
1859                 break;
1860         case CHECKSUM_HW:
1861                 /* XXX Implement me. */
1862         default:
1863                 skb->proto_data_valid = 0;
1864                 break;
1865         }
1866 #endif
1867
1868         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1869                 if (!ptype->dev || ptype->dev == skb->dev) {
1870                         if (pt_prev)
1871                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1872                         pt_prev = ptype;
1873                 }
1874         }
1875
1876 #ifdef CONFIG_NET_CLS_ACT
1877         if (pt_prev) {
1878                 ret = deliver_skb(skb, pt_prev, orig_dev);
1879                 pt_prev = NULL; /* noone else should process this after*/
1880         } else {
1881                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1882         }
1883
1884         ret = ing_filter(skb);
1885
1886         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1887                 kfree_skb(skb);
1888                 goto out;
1889         }
1890
1891         skb->tc_verd = 0;
1892 ncls:
1893 #endif
1894
1895         handle_diverter(skb);
1896
1897         if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1898                 goto out;
1899
1900         type = skb->protocol;
1901         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1902                 if (ptype->type == type &&
1903                     (!ptype->dev || ptype->dev == skb->dev)) {
1904                         if (pt_prev)
1905                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1906                         pt_prev = ptype;
1907                 }
1908         }
1909
1910         if (pt_prev) {
1911                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1912         } else {
1913                 kfree_skb(skb);
1914                 /* Jamal, now you will not able to escape explaining
1915                  * me how you were going to use this. :-)
1916                  */
1917                 ret = NET_RX_DROP;
1918         }
1919
1920 out:
1921         rcu_read_unlock();
1922         return ret;
1923 }
1924
1925 static int process_backlog(struct net_device *backlog_dev, int *budget)
1926 {
1927         int work = 0;
1928         int quota = min(backlog_dev->quota, *budget);
1929         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1930         unsigned long start_time = jiffies;
1931
1932         backlog_dev->weight = weight_p;
1933         for (;;) {
1934                 struct sk_buff *skb;
1935                 struct net_device *dev;
1936
1937                 local_irq_disable();
1938                 skb = __skb_dequeue(&queue->input_pkt_queue);
1939                 if (!skb)
1940                         goto job_done;
1941                 local_irq_enable();
1942
1943                 dev = skb->dev;
1944
1945                 netif_receive_skb(skb);
1946
1947                 dev_put(dev);
1948
1949                 work++;
1950
1951                 if (work >= quota || jiffies - start_time > 1)
1952                         break;
1953
1954         }
1955
1956         backlog_dev->quota -= work;
1957         *budget -= work;
1958         return -1;
1959
1960 job_done:
1961         backlog_dev->quota -= work;
1962         *budget -= work;
1963
1964         list_del(&backlog_dev->poll_list);
1965         smp_mb__before_clear_bit();
1966         netif_poll_enable(backlog_dev);
1967
1968         local_irq_enable();
1969         return 0;
1970 }
1971
1972 static void net_rx_action(struct softirq_action *h)
1973 {
1974         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1975         unsigned long start_time = jiffies;
1976         int budget = netdev_budget;
1977         void *have;
1978
1979         local_irq_disable();
1980
1981         while (!list_empty(&queue->poll_list)) {
1982                 struct net_device *dev;
1983
1984                 if (budget <= 0 || jiffies - start_time > 1)
1985                         goto softnet_break;
1986
1987                 local_irq_enable();
1988
1989                 dev = list_entry(queue->poll_list.next,
1990                                  struct net_device, poll_list);
1991                 have = netpoll_poll_lock(dev);
1992
1993                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1994                         netpoll_poll_unlock(have);
1995                         local_irq_disable();
1996                         list_move_tail(&dev->poll_list, &queue->poll_list);
1997                         if (dev->quota < 0)
1998                                 dev->quota += dev->weight;
1999                         else
2000                                 dev->quota = dev->weight;
2001                 } else {
2002                         netpoll_poll_unlock(have);
2003                         dev_put(dev);
2004                         local_irq_disable();
2005                 }
2006         }
2007 out:
2008 #ifdef CONFIG_NET_DMA
2009         /*
2010          * There may not be any more sk_buffs coming right now, so push
2011          * any pending DMA copies to hardware
2012          */
2013         if (net_dma_client) {
2014                 struct dma_chan *chan;
2015                 rcu_read_lock();
2016                 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
2017                         dma_async_memcpy_issue_pending(chan);
2018                 rcu_read_unlock();
2019         }
2020 #endif
2021         local_irq_enable();
2022         return;
2023
2024 softnet_break:
2025         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2026         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2027         goto out;
2028 }
2029
2030 static gifconf_func_t * gifconf_list [NPROTO];
2031
2032 /**
2033  *      register_gifconf        -       register a SIOCGIF handler
2034  *      @family: Address family
2035  *      @gifconf: Function handler
2036  *
2037  *      Register protocol dependent address dumping routines. The handler
2038  *      that is passed must not be freed or reused until it has been replaced
2039  *      by another handler.
2040  */
2041 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2042 {
2043         if (family >= NPROTO)
2044                 return -EINVAL;
2045         gifconf_list[family] = gifconf;
2046         return 0;
2047 }
2048
2049
2050 /*
2051  *      Map an interface index to its name (SIOCGIFNAME)
2052  */
2053
2054 /*
2055  *      We need this ioctl for efficient implementation of the
2056  *      if_indextoname() function required by the IPv6 API.  Without
2057  *      it, we would have to search all the interfaces to find a
2058  *      match.  --pb
2059  */
2060
2061 static int dev_ifname(struct ifreq __user *arg)
2062 {
2063         struct net_device *dev;
2064         struct ifreq ifr;
2065
2066         /*
2067          *      Fetch the caller's info block.
2068          */
2069
2070         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2071                 return -EFAULT;
2072
2073         read_lock(&dev_base_lock);
2074         dev = __dev_get_by_index(ifr.ifr_ifindex);
2075         if (!dev) {
2076                 read_unlock(&dev_base_lock);
2077                 return -ENODEV;
2078         }
2079
2080         strcpy(ifr.ifr_name, dev->name);
2081         read_unlock(&dev_base_lock);
2082
2083         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2084                 return -EFAULT;
2085         return 0;
2086 }
2087
2088 /*
2089  *      Perform a SIOCGIFCONF call. This structure will change
2090  *      size eventually, and there is nothing I can do about it.
2091  *      Thus we will need a 'compatibility mode'.
2092  */
2093
2094 static int dev_ifconf(char __user *arg)
2095 {
2096         struct ifconf ifc;
2097         struct net_device *dev;
2098         char __user *pos;
2099         int len;
2100         int total;
2101         int i;
2102
2103         /*
2104          *      Fetch the caller's info block.
2105          */
2106
2107         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2108                 return -EFAULT;
2109
2110         pos = ifc.ifc_buf;
2111         len = ifc.ifc_len;
2112
2113         /*
2114          *      Loop over the interfaces, and write an info block for each.
2115          */
2116
2117         total = 0;
2118         for (dev = dev_base; dev; dev = dev->next) {
2119                 if (vx_flags(VXF_HIDE_NETIF, 0) &&
2120                         !dev_in_nx_info(dev, current->nx_info))
2121                         continue;
2122                 for (i = 0; i < NPROTO; i++) {
2123                         if (gifconf_list[i]) {
2124                                 int done;
2125                                 if (!pos)
2126                                         done = gifconf_list[i](dev, NULL, 0);
2127                                 else
2128                                         done = gifconf_list[i](dev, pos + total,
2129                                                                len - total);
2130                                 if (done < 0)
2131                                         return -EFAULT;
2132                                 total += done;
2133                         }
2134                 }
2135         }
2136
2137         /*
2138          *      All done.  Write the updated control block back to the caller.
2139          */
2140         ifc.ifc_len = total;
2141
2142         /*
2143          *      Both BSD and Solaris return 0 here, so we do too.
2144          */
2145         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2146 }
2147
2148 #ifdef CONFIG_PROC_FS
2149 /*
2150  *      This is invoked by the /proc filesystem handler to display a device
2151  *      in detail.
2152  */
2153 static __inline__ struct net_device *dev_get_idx(loff_t pos)
2154 {
2155         struct net_device *dev;
2156         loff_t i;
2157
2158         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2159
2160         return i == pos ? dev : NULL;
2161 }
2162
2163 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2164 {
2165         read_lock(&dev_base_lock);
2166         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2167 }
2168
2169 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2170 {
2171         ++*pos;
2172         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2173 }
2174
2175 void dev_seq_stop(struct seq_file *seq, void *v)
2176 {
2177         read_unlock(&dev_base_lock);
2178 }
2179
2180 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2181 {
2182         struct nx_info *nxi = current->nx_info;
2183
2184         if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
2185                 return;
2186         if (dev->get_stats) {
2187                 struct net_device_stats *stats = dev->get_stats(dev);
2188
2189                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2190                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2191                            dev->name, stats->rx_bytes, stats->rx_packets,
2192                            stats->rx_errors,
2193                            stats->rx_dropped + stats->rx_missed_errors,
2194                            stats->rx_fifo_errors,
2195                            stats->rx_length_errors + stats->rx_over_errors +
2196                              stats->rx_crc_errors + stats->rx_frame_errors,
2197                            stats->rx_compressed, stats->multicast,
2198                            stats->tx_bytes, stats->tx_packets,
2199                            stats->tx_errors, stats->tx_dropped,
2200                            stats->tx_fifo_errors, stats->collisions,
2201                            stats->tx_carrier_errors +
2202                              stats->tx_aborted_errors +
2203                              stats->tx_window_errors +
2204                              stats->tx_heartbeat_errors,
2205                            stats->tx_compressed);
2206         } else
2207                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2208 }
2209
2210 /*
2211  *      Called from the PROCfs module. This now uses the new arbitrary sized
2212  *      /proc/net interface to create /proc/net/dev
2213  */
2214 static int dev_seq_show(struct seq_file *seq, void *v)
2215 {
2216         if (v == SEQ_START_TOKEN)
2217                 seq_puts(seq, "Inter-|   Receive                            "
2218                               "                    |  Transmit\n"
2219                               " face |bytes    packets errs drop fifo frame "
2220                               "compressed multicast|bytes    packets errs "
2221                               "drop fifo colls carrier compressed\n");
2222         else
2223                 dev_seq_printf_stats(seq, v);
2224         return 0;
2225 }
2226
2227 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2228 {
2229         struct netif_rx_stats *rc = NULL;
2230
2231         while (*pos < NR_CPUS)
2232                 if (cpu_online(*pos)) {
2233                         rc = &per_cpu(netdev_rx_stat, *pos);
2234                         break;
2235                 } else
2236                         ++*pos;
2237         return rc;
2238 }
2239
2240 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2241 {
2242         return softnet_get_online(pos);
2243 }
2244
2245 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2246 {
2247         ++*pos;
2248         return softnet_get_online(pos);
2249 }
2250
2251 static void softnet_seq_stop(struct seq_file *seq, void *v)
2252 {
2253 }
2254
2255 static int softnet_seq_show(struct seq_file *seq, void *v)
2256 {
2257         struct netif_rx_stats *s = v;
2258
2259         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2260                    s->total, s->dropped, s->time_squeeze, 0,
2261                    0, 0, 0, 0, /* was fastroute */
2262                    s->cpu_collision );
2263         return 0;
2264 }
2265
2266 static struct seq_operations dev_seq_ops = {
2267         .start = dev_seq_start,
2268         .next  = dev_seq_next,
2269         .stop  = dev_seq_stop,
2270         .show  = dev_seq_show,
2271 };
2272
2273 static int dev_seq_open(struct inode *inode, struct file *file)
2274 {
2275         return seq_open(file, &dev_seq_ops);
2276 }
2277
2278 static struct file_operations dev_seq_fops = {
2279         .owner   = THIS_MODULE,
2280         .open    = dev_seq_open,
2281         .read    = seq_read,
2282         .llseek  = seq_lseek,
2283         .release = seq_release,
2284 };
2285
2286 static struct seq_operations softnet_seq_ops = {
2287         .start = softnet_seq_start,
2288         .next  = softnet_seq_next,
2289         .stop  = softnet_seq_stop,
2290         .show  = softnet_seq_show,
2291 };
2292
2293 static int softnet_seq_open(struct inode *inode, struct file *file)
2294 {
2295         return seq_open(file, &softnet_seq_ops);
2296 }
2297
2298 static struct file_operations softnet_seq_fops = {
2299         .owner   = THIS_MODULE,
2300         .open    = softnet_seq_open,
2301         .read    = seq_read,
2302         .llseek  = seq_lseek,
2303         .release = seq_release,
2304 };
2305
2306 #ifdef CONFIG_WIRELESS_EXT
2307 extern int wireless_proc_init(void);
2308 #else
2309 #define wireless_proc_init() 0
2310 #endif
2311
2312 static int __init dev_proc_init(void)
2313 {
2314         int rc = -ENOMEM;
2315
2316         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2317                 goto out;
2318         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2319                 goto out_dev;
2320         if (wireless_proc_init())
2321                 goto out_softnet;
2322         rc = 0;
2323 out:
2324         return rc;
2325 out_softnet:
2326         proc_net_remove("softnet_stat");
2327 out_dev:
2328         proc_net_remove("dev");
2329         goto out;
2330 }
2331 #else
2332 #define dev_proc_init() 0
2333 #endif  /* CONFIG_PROC_FS */
2334
2335
2336 /**
2337  *      netdev_set_master       -       set up master/slave pair
2338  *      @slave: slave device
2339  *      @master: new master device
2340  *
2341  *      Changes the master device of the slave. Pass %NULL to break the
2342  *      bonding. The caller must hold the RTNL semaphore. On a failure
2343  *      a negative errno code is returned. On success the reference counts
2344  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2345  *      function returns zero.
2346  */
2347 int netdev_set_master(struct net_device *slave, struct net_device *master)
2348 {
2349         struct net_device *old = slave->master;
2350
2351         ASSERT_RTNL();
2352
2353         if (master) {
2354                 if (old)
2355                         return -EBUSY;
2356                 dev_hold(master);
2357         }
2358
2359         slave->master = master;
2360
2361         synchronize_net();
2362
2363         if (old)
2364                 dev_put(old);
2365
2366         if (master)
2367                 slave->flags |= IFF_SLAVE;
2368         else
2369                 slave->flags &= ~IFF_SLAVE;
2370
2371         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2372         return 0;
2373 }
2374
2375 /**
2376  *      dev_set_promiscuity     - update promiscuity count on a device
2377  *      @dev: device
2378  *      @inc: modifier
2379  *
2380  *      Add or remove promiscuity from a device. While the count in the device
2381  *      remains above zero the interface remains promiscuous. Once it hits zero
2382  *      the device reverts back to normal filtering operation. A negative inc
2383  *      value is used to drop promiscuity on the device.
2384  */
2385 void dev_set_promiscuity(struct net_device *dev, int inc)
2386 {
2387         unsigned short old_flags = dev->flags;
2388
2389         if ((dev->promiscuity += inc) == 0)
2390                 dev->flags &= ~IFF_PROMISC;
2391         else
2392                 dev->flags |= IFF_PROMISC;
2393         if (dev->flags != old_flags) {
2394                 dev_mc_upload(dev);
2395                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2396                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2397                                                                "left");
2398                 audit_log(current->audit_context, GFP_ATOMIC,
2399                         AUDIT_ANOM_PROMISCUOUS,
2400                         "dev=%s prom=%d old_prom=%d auid=%u",
2401                         dev->name, (dev->flags & IFF_PROMISC),
2402                         (old_flags & IFF_PROMISC),
2403                         audit_get_loginuid(current->audit_context));
2404         }
2405 }
2406
2407 /**
2408  *      dev_set_allmulti        - update allmulti count on a device
2409  *      @dev: device
2410  *      @inc: modifier
2411  *
2412  *      Add or remove reception of all multicast frames to a device. While the
2413  *      count in the device remains above zero the interface remains listening
2414  *      to all interfaces. Once it hits zero the device reverts back to normal
2415  *      filtering operation. A negative @inc value is used to drop the counter
2416  *      when releasing a resource needing all multicasts.
2417  */
2418
2419 void dev_set_allmulti(struct net_device *dev, int inc)
2420 {
2421         unsigned short old_flags = dev->flags;
2422
2423         dev->flags |= IFF_ALLMULTI;
2424         if ((dev->allmulti += inc) == 0)
2425                 dev->flags &= ~IFF_ALLMULTI;
2426         if (dev->flags ^ old_flags)
2427                 dev_mc_upload(dev);
2428 }
2429
2430 unsigned dev_get_flags(const struct net_device *dev)
2431 {
2432         unsigned flags;
2433
2434         flags = (dev->flags & ~(IFF_PROMISC |
2435                                 IFF_ALLMULTI |
2436                                 IFF_RUNNING |
2437                                 IFF_LOWER_UP |
2438                                 IFF_DORMANT)) |
2439                 (dev->gflags & (IFF_PROMISC |
2440                                 IFF_ALLMULTI));
2441
2442         if (netif_running(dev)) {
2443                 if (netif_oper_up(dev))
2444                         flags |= IFF_RUNNING;
2445                 if (netif_carrier_ok(dev))
2446                         flags |= IFF_LOWER_UP;
2447                 if (netif_dormant(dev))
2448                         flags |= IFF_DORMANT;
2449         }
2450
2451         return flags;
2452 }
2453
2454 int dev_change_flags(struct net_device *dev, unsigned flags)
2455 {
2456         int ret;
2457         int old_flags = dev->flags;
2458
2459         /*
2460          *      Set the flags on our device.
2461          */
2462
2463         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2464                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2465                                IFF_AUTOMEDIA)) |
2466                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2467                                     IFF_ALLMULTI));
2468
2469         /*
2470          *      Load in the correct multicast list now the flags have changed.
2471          */
2472
2473         dev_mc_upload(dev);
2474
2475         /*
2476          *      Have we downed the interface. We handle IFF_UP ourselves
2477          *      according to user attempts to set it, rather than blindly
2478          *      setting it.
2479          */
2480
2481         ret = 0;
2482         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2483                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2484
2485                 if (!ret)
2486                         dev_mc_upload(dev);
2487         }
2488
2489         if (dev->flags & IFF_UP &&
2490             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2491                                           IFF_VOLATILE)))
2492                 raw_notifier_call_chain(&netdev_chain,
2493                                 NETDEV_CHANGE, dev);
2494
2495         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2496                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2497                 dev->gflags ^= IFF_PROMISC;
2498                 dev_set_promiscuity(dev, inc);
2499         }
2500
2501         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2502            is important. Some (broken) drivers set IFF_PROMISC, when
2503            IFF_ALLMULTI is requested not asking us and not reporting.
2504          */
2505         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2506                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2507                 dev->gflags ^= IFF_ALLMULTI;
2508                 dev_set_allmulti(dev, inc);
2509         }
2510
2511         if (old_flags ^ dev->flags)
2512                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2513
2514         return ret;
2515 }
2516
2517 int dev_set_mtu(struct net_device *dev, int new_mtu)
2518 {
2519         int err;
2520
2521         if (new_mtu == dev->mtu)
2522                 return 0;
2523
2524         /*      MTU must be positive.    */
2525         if (new_mtu < 0)
2526                 return -EINVAL;
2527
2528         if (!netif_device_present(dev))
2529                 return -ENODEV;
2530
2531         err = 0;
2532         if (dev->change_mtu)
2533                 err = dev->change_mtu(dev, new_mtu);
2534         else
2535                 dev->mtu = new_mtu;
2536         if (!err && dev->flags & IFF_UP)
2537                 raw_notifier_call_chain(&netdev_chain,
2538                                 NETDEV_CHANGEMTU, dev);
2539         return err;
2540 }
2541
2542 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2543 {
2544         int err;
2545
2546         if (!dev->set_mac_address)
2547                 return -EOPNOTSUPP;
2548         if (sa->sa_family != dev->type)
2549                 return -EINVAL;
2550         if (!netif_device_present(dev))
2551                 return -ENODEV;
2552         err = dev->set_mac_address(dev, sa);
2553         if (!err)
2554                 raw_notifier_call_chain(&netdev_chain,
2555                                 NETDEV_CHANGEADDR, dev);
2556         return err;
2557 }
2558
2559 /*
2560  *      Perform the SIOCxIFxxx calls.
2561  */
2562 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2563 {
2564         int err;
2565         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2566
2567         if (!dev)
2568                 return -ENODEV;
2569
2570         switch (cmd) {
2571                 case SIOCGIFFLAGS:      /* Get interface flags */
2572                         ifr->ifr_flags = dev_get_flags(dev);
2573                         return 0;
2574
2575                 case SIOCSIFFLAGS:      /* Set interface flags */
2576                         return dev_change_flags(dev, ifr->ifr_flags);
2577
2578                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2579                                            (currently unused) */
2580                         ifr->ifr_metric = 0;
2581                         return 0;
2582
2583                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2584                                            (currently unused) */
2585                         return -EOPNOTSUPP;
2586
2587                 case SIOCGIFMTU:        /* Get the MTU of a device */
2588                         ifr->ifr_mtu = dev->mtu;
2589                         return 0;
2590
2591                 case SIOCSIFMTU:        /* Set the MTU of a device */
2592                         return dev_set_mtu(dev, ifr->ifr_mtu);
2593
2594                 case SIOCGIFHWADDR:
2595                         if (!dev->addr_len)
2596                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2597                         else
2598                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2599                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2600                         ifr->ifr_hwaddr.sa_family = dev->type;
2601                         return 0;
2602
2603                 case SIOCSIFHWADDR:
2604                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2605
2606                 case SIOCSIFHWBROADCAST:
2607                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2608                                 return -EINVAL;
2609                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2610                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2611                         raw_notifier_call_chain(&netdev_chain,
2612                                             NETDEV_CHANGEADDR, dev);
2613                         return 0;
2614
2615                 case SIOCGIFMAP:
2616                         ifr->ifr_map.mem_start = dev->mem_start;
2617                         ifr->ifr_map.mem_end   = dev->mem_end;
2618                         ifr->ifr_map.base_addr = dev->base_addr;
2619                         ifr->ifr_map.irq       = dev->irq;
2620                         ifr->ifr_map.dma       = dev->dma;
2621                         ifr->ifr_map.port      = dev->if_port;
2622                         return 0;
2623
2624                 case SIOCSIFMAP:
2625                         if (dev->set_config) {
2626                                 if (!netif_device_present(dev))
2627                                         return -ENODEV;
2628                                 return dev->set_config(dev, &ifr->ifr_map);
2629                         }
2630                         return -EOPNOTSUPP;
2631
2632                 case SIOCADDMULTI:
2633                         if (!dev->set_multicast_list ||
2634                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2635                                 return -EINVAL;
2636                         if (!netif_device_present(dev))
2637                                 return -ENODEV;
2638                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2639                                           dev->addr_len, 1);
2640
2641                 case SIOCDELMULTI:
2642                         if (!dev->set_multicast_list ||
2643                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2644                                 return -EINVAL;
2645                         if (!netif_device_present(dev))
2646                                 return -ENODEV;
2647                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2648                                              dev->addr_len, 1);
2649
2650                 case SIOCGIFINDEX:
2651                         ifr->ifr_ifindex = dev->ifindex;
2652                         return 0;
2653
2654                 case SIOCGIFTXQLEN:
2655                         ifr->ifr_qlen = dev->tx_queue_len;
2656                         return 0;
2657
2658                 case SIOCSIFTXQLEN:
2659                         if (ifr->ifr_qlen < 0)
2660                                 return -EINVAL;
2661                         dev->tx_queue_len = ifr->ifr_qlen;
2662                         return 0;
2663
2664                 case SIOCSIFNAME:
2665                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2666                         return dev_change_name(dev, ifr->ifr_newname);
2667
2668                 /*
2669                  *      Unknown or private ioctl
2670                  */
2671
2672                 default:
2673                         if ((cmd >= SIOCDEVPRIVATE &&
2674                             cmd <= SIOCDEVPRIVATE + 15) ||
2675                             cmd == SIOCBONDENSLAVE ||
2676                             cmd == SIOCBONDRELEASE ||
2677                             cmd == SIOCBONDSETHWADDR ||
2678                             cmd == SIOCBONDSLAVEINFOQUERY ||
2679                             cmd == SIOCBONDINFOQUERY ||
2680                             cmd == SIOCBONDCHANGEACTIVE ||
2681                             cmd == SIOCGMIIPHY ||
2682                             cmd == SIOCGMIIREG ||
2683                             cmd == SIOCSMIIREG ||
2684                             cmd == SIOCBRADDIF ||
2685                             cmd == SIOCBRDELIF ||
2686                             cmd == SIOCWANDEV) {
2687                                 err = -EOPNOTSUPP;
2688                                 if (dev->do_ioctl) {
2689                                         if (netif_device_present(dev))
2690                                                 err = dev->do_ioctl(dev, ifr,
2691                                                                     cmd);
2692                                         else
2693                                                 err = -ENODEV;
2694                                 }
2695                         } else
2696                                 err = -EINVAL;
2697
2698         }
2699         return err;
2700 }
2701
2702 /*
2703  *      This function handles all "interface"-type I/O control requests. The actual
2704  *      'doing' part of this is dev_ifsioc above.
2705  */
2706
2707 /**
2708  *      dev_ioctl       -       network device ioctl
2709  *      @cmd: command to issue
2710  *      @arg: pointer to a struct ifreq in user space
2711  *
2712  *      Issue ioctl functions to devices. This is normally called by the
2713  *      user space syscall interfaces but can sometimes be useful for
2714  *      other purposes. The return value is the return from the syscall if
2715  *      positive or a negative errno code on error.
2716  */
2717
2718 int dev_ioctl(unsigned int cmd, void __user *arg)
2719 {
2720         struct ifreq ifr;
2721         int ret;
2722         char *colon;
2723
2724         /* One special case: SIOCGIFCONF takes ifconf argument
2725            and requires shared lock, because it sleeps writing
2726            to user space.
2727          */
2728
2729         if (cmd == SIOCGIFCONF) {
2730                 rtnl_lock();
2731                 ret = dev_ifconf((char __user *) arg);
2732                 rtnl_unlock();
2733                 return ret;
2734         }
2735         if (cmd == SIOCGIFNAME)
2736                 return dev_ifname((struct ifreq __user *)arg);
2737
2738         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2739                 return -EFAULT;
2740
2741         ifr.ifr_name[IFNAMSIZ-1] = 0;
2742
2743         colon = strchr(ifr.ifr_name, ':');
2744         if (colon)
2745                 *colon = 0;
2746
2747         /*
2748          *      See which interface the caller is talking about.
2749          */
2750
2751         switch (cmd) {
2752                 /*
2753                  *      These ioctl calls:
2754                  *      - can be done by all.
2755                  *      - atomic and do not require locking.
2756                  *      - return a value
2757                  */
2758                 case SIOCGIFFLAGS:
2759                 case SIOCGIFMETRIC:
2760                 case SIOCGIFMTU:
2761                 case SIOCGIFHWADDR:
2762                 case SIOCGIFSLAVE:
2763                 case SIOCGIFMAP:
2764                 case SIOCGIFINDEX:
2765                 case SIOCGIFTXQLEN:
2766                         dev_load(ifr.ifr_name);
2767                         read_lock(&dev_base_lock);
2768                         ret = dev_ifsioc(&ifr, cmd);
2769                         read_unlock(&dev_base_lock);
2770                         if (!ret) {
2771                                 if (colon)
2772                                         *colon = ':';
2773                                 if (copy_to_user(arg, &ifr,
2774                                                  sizeof(struct ifreq)))
2775                                         ret = -EFAULT;
2776                         }
2777                         return ret;
2778
2779                 case SIOCETHTOOL:
2780                         dev_load(ifr.ifr_name);
2781                         rtnl_lock();
2782                         ret = dev_ethtool(&ifr);
2783                         rtnl_unlock();
2784                         if (!ret) {
2785                                 if (colon)
2786                                         *colon = ':';
2787                                 if (copy_to_user(arg, &ifr,
2788                                                  sizeof(struct ifreq)))
2789                                         ret = -EFAULT;
2790                         }
2791                         return ret;
2792
2793                 /*
2794                  *      These ioctl calls:
2795                  *      - require superuser power.
2796                  *      - require strict serialization.
2797                  *      - return a value
2798                  */
2799                 case SIOCGMIIPHY:
2800                 case SIOCGMIIREG:
2801                 case SIOCSIFNAME:
2802                         if (!capable(CAP_NET_ADMIN))
2803                                 return -EPERM;
2804                         dev_load(ifr.ifr_name);
2805                         rtnl_lock();
2806                         ret = dev_ifsioc(&ifr, cmd);
2807                         rtnl_unlock();
2808                         if (!ret) {
2809                                 if (colon)
2810                                         *colon = ':';
2811                                 if (copy_to_user(arg, &ifr,
2812                                                  sizeof(struct ifreq)))
2813                                         ret = -EFAULT;
2814                         }
2815                         return ret;
2816
2817                 /*
2818                  *      These ioctl calls:
2819                  *      - require superuser power.
2820                  *      - require strict serialization.
2821                  *      - do not return a value
2822                  */
2823                 case SIOCSIFFLAGS:
2824                 case SIOCSIFMETRIC:
2825                 case SIOCSIFMTU:
2826                 case SIOCSIFMAP:
2827                 case SIOCSIFHWADDR:
2828                 case SIOCSIFSLAVE:
2829                 case SIOCADDMULTI:
2830                 case SIOCDELMULTI:
2831                 case SIOCSIFHWBROADCAST:
2832                 case SIOCSIFTXQLEN:
2833                 case SIOCSMIIREG:
2834                 case SIOCBONDENSLAVE:
2835                 case SIOCBONDRELEASE:
2836                 case SIOCBONDSETHWADDR:
2837                 case SIOCBONDCHANGEACTIVE:
2838                 case SIOCBRADDIF:
2839                 case SIOCBRDELIF:
2840                         if (!capable(CAP_NET_ADMIN))
2841                                 return -EPERM;
2842                         /* fall through */
2843                 case SIOCBONDSLAVEINFOQUERY:
2844                 case SIOCBONDINFOQUERY:
2845                         dev_load(ifr.ifr_name);
2846                         rtnl_lock();
2847                         ret = dev_ifsioc(&ifr, cmd);
2848                         rtnl_unlock();
2849                         return ret;
2850
2851                 case SIOCGIFMEM:
2852                         /* Get the per device memory space. We can add this but
2853                          * currently do not support it */
2854                 case SIOCSIFMEM:
2855                         /* Set the per device memory buffer space.
2856                          * Not applicable in our case */
2857                 case SIOCSIFLINK:
2858                         return -EINVAL;
2859
2860                 /*
2861                  *      Unknown or private ioctl.
2862                  */
2863                 default:
2864                         if (cmd == SIOCWANDEV ||
2865                             (cmd >= SIOCDEVPRIVATE &&
2866                              cmd <= SIOCDEVPRIVATE + 15)) {
2867                                 dev_load(ifr.ifr_name);
2868                                 rtnl_lock();
2869                                 ret = dev_ifsioc(&ifr, cmd);
2870                                 rtnl_unlock();
2871                                 if (!ret && copy_to_user(arg, &ifr,
2872                                                          sizeof(struct ifreq)))
2873                                         ret = -EFAULT;
2874                                 return ret;
2875                         }
2876 #ifdef CONFIG_WIRELESS_EXT
2877                         /* Take care of Wireless Extensions */
2878                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2879                                 /* If command is `set a parameter', or
2880                                  * `get the encoding parameters', check if
2881                                  * the user has the right to do it */
2882                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2883                                     || cmd == SIOCGIWENCODEEXT) {
2884                                         if (!capable(CAP_NET_ADMIN))
2885                                                 return -EPERM;
2886                                 }
2887                                 dev_load(ifr.ifr_name);
2888                                 rtnl_lock();
2889                                 /* Follow me in net/core/wireless.c */
2890                                 ret = wireless_process_ioctl(&ifr, cmd);
2891                                 rtnl_unlock();
2892                                 if (IW_IS_GET(cmd) &&
2893                                     copy_to_user(arg, &ifr,
2894                                                  sizeof(struct ifreq)))
2895                                         ret = -EFAULT;
2896                                 return ret;
2897                         }
2898 #endif  /* CONFIG_WIRELESS_EXT */
2899                         return -EINVAL;
2900         }
2901 }
2902
2903
2904 /**
2905  *      dev_new_index   -       allocate an ifindex
2906  *
2907  *      Returns a suitable unique value for a new device interface
2908  *      number.  The caller must hold the rtnl semaphore or the
2909  *      dev_base_lock to be sure it remains unique.
2910  */
2911 static int dev_new_index(void)
2912 {
2913         static int ifindex;
2914         for (;;) {
2915                 if (++ifindex <= 0)
2916                         ifindex = 1;
2917                 if (!__dev_get_by_index(ifindex))
2918                         return ifindex;
2919         }
2920 }
2921
2922 static int dev_boot_phase = 1;
2923
2924 /* Delayed registration/unregisteration */
2925 static DEFINE_SPINLOCK(net_todo_list_lock);
2926 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2927
2928 static inline void net_set_todo(struct net_device *dev)
2929 {
2930         spin_lock(&net_todo_list_lock);
2931         list_add_tail(&dev->todo_list, &net_todo_list);
2932         spin_unlock(&net_todo_list_lock);
2933 }
2934
2935 /**
2936  *      register_netdevice      - register a network device
2937  *      @dev: device to register
2938  *
2939  *      Take a completed network device structure and add it to the kernel
2940  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2941  *      chain. 0 is returned on success. A negative errno code is returned
2942  *      on a failure to set up the device, or if the name is a duplicate.
2943  *
2944  *      Callers must hold the rtnl semaphore. You may want
2945  *      register_netdev() instead of this.
2946  *
2947  *      BUGS:
2948  *      The locking appears insufficient to guarantee two parallel registers
2949  *      will not get the same name.
2950  */
2951
2952 int register_netdevice(struct net_device *dev)
2953 {
2954         struct hlist_head *head;
2955         struct hlist_node *p;
2956         int ret;
2957
2958         BUG_ON(dev_boot_phase);
2959         ASSERT_RTNL();
2960
2961         might_sleep();
2962
2963         /* When net_device's are persistent, this will be fatal. */
2964         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2965
2966         spin_lock_init(&dev->queue_lock);
2967         spin_lock_init(&dev->_xmit_lock);
2968         dev->xmit_lock_owner = -1;
2969 #ifdef CONFIG_NET_CLS_ACT
2970         spin_lock_init(&dev->ingress_lock);
2971 #endif
2972
2973         ret = alloc_divert_blk(dev);
2974         if (ret)
2975                 goto out;
2976
2977         dev->iflink = -1;
2978
2979         /* Init, if this function is available */
2980         if (dev->init) {
2981                 ret = dev->init(dev);
2982                 if (ret) {
2983                         if (ret > 0)
2984                                 ret = -EIO;
2985                         goto out_err;
2986                 }
2987         }
2988
2989         if (!dev_valid_name(dev->name)) {
2990                 ret = -EINVAL;
2991                 goto out_err;
2992         }
2993
2994         dev->ifindex = dev_new_index();
2995         if (dev->iflink == -1)
2996                 dev->iflink = dev->ifindex;
2997
2998         /* Check for existence of name */
2999         head = dev_name_hash(dev->name);
3000         hlist_for_each(p, head) {
3001                 struct net_device *d
3002                         = hlist_entry(p, struct net_device, name_hlist);
3003                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3004                         ret = -EEXIST;
3005                         goto out_err;
3006                 }
3007         }
3008
3009         /* Fix illegal SG+CSUM combinations. */
3010         if ((dev->features & NETIF_F_SG) &&
3011             !(dev->features & NETIF_F_ALL_CSUM)) {
3012                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3013                        dev->name);
3014                 dev->features &= ~NETIF_F_SG;
3015         }
3016
3017         /* TSO requires that SG is present as well. */
3018         if ((dev->features & NETIF_F_TSO) &&
3019             !(dev->features & NETIF_F_SG)) {
3020                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3021                        dev->name);
3022                 dev->features &= ~NETIF_F_TSO;
3023         }
3024         if (dev->features & NETIF_F_UFO) {
3025                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3026                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3027                                         "NETIF_F_HW_CSUM feature.\n",
3028                                                         dev->name);
3029                         dev->features &= ~NETIF_F_UFO;
3030                 }
3031                 if (!(dev->features & NETIF_F_SG)) {
3032                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3033                                         "NETIF_F_SG feature.\n",
3034                                         dev->name);
3035                         dev->features &= ~NETIF_F_UFO;
3036                 }
3037         }
3038
3039         /*
3040          *      nil rebuild_header routine,
3041          *      that should be never called and used as just bug trap.
3042          */
3043
3044         if (!dev->rebuild_header)
3045                 dev->rebuild_header = default_rebuild_header;
3046
3047         ret = netdev_register_sysfs(dev);
3048         if (ret)
3049                 goto out_err;
3050         dev->reg_state = NETREG_REGISTERED;
3051
3052         /*
3053          *      Default initial state at registry is that the
3054          *      device is present.
3055          */
3056
3057         set_bit(__LINK_STATE_PRESENT, &dev->state);
3058
3059         dev->next = NULL;
3060         dev_init_scheduler(dev);
3061         write_lock_bh(&dev_base_lock);
3062         *dev_tail = dev;
3063         dev_tail = &dev->next;
3064         hlist_add_head(&dev->name_hlist, head);
3065         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3066         dev_hold(dev);
3067         write_unlock_bh(&dev_base_lock);
3068
3069         /* Notify protocols, that a new device appeared. */
3070         raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3071
3072         ret = 0;
3073
3074 out:
3075         return ret;
3076 out_err:
3077         free_divert_blk(dev);
3078         goto out;
3079 }
3080
3081 /**
3082  *      register_netdev - register a network device
3083  *      @dev: device to register
3084  *
3085  *      Take a completed network device structure and add it to the kernel
3086  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3087  *      chain. 0 is returned on success. A negative errno code is returned
3088  *      on a failure to set up the device, or if the name is a duplicate.
3089  *
3090  *      This is a wrapper around register_netdev that takes the rtnl semaphore
3091  *      and expands the device name if you passed a format string to
3092  *      alloc_netdev.
3093  */
3094 int register_netdev(struct net_device *dev)
3095 {
3096         int err;
3097
3098         rtnl_lock();
3099
3100         /*
3101          * If the name is a format string the caller wants us to do a
3102          * name allocation.
3103          */
3104         if (strchr(dev->name, '%')) {
3105                 err = dev_alloc_name(dev, dev->name);
3106                 if (err < 0)
3107                         goto out;
3108         }
3109
3110         /*
3111          * Back compatibility hook. Kill this one in 2.5
3112          */
3113         if (dev->name[0] == 0 || dev->name[0] == ' ') {
3114                 err = dev_alloc_name(dev, "eth%d");
3115                 if (err < 0)
3116                         goto out;
3117         }
3118
3119         err = register_netdevice(dev);
3120 out:
3121         rtnl_unlock();
3122         return err;
3123 }
3124 EXPORT_SYMBOL(register_netdev);
3125
3126 /*
3127  * netdev_wait_allrefs - wait until all references are gone.
3128  *
3129  * This is called when unregistering network devices.
3130  *
3131  * Any protocol or device that holds a reference should register
3132  * for netdevice notification, and cleanup and put back the
3133  * reference if they receive an UNREGISTER event.
3134  * We can get stuck here if buggy protocols don't correctly
3135  * call dev_put.
3136  */
3137 static void netdev_wait_allrefs(struct net_device *dev)
3138 {
3139         unsigned long rebroadcast_time, warning_time;
3140
3141         rebroadcast_time = warning_time = jiffies;
3142         while (atomic_read(&dev->refcnt) != 0) {
3143                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3144                         rtnl_lock();
3145
3146                         /* Rebroadcast unregister notification */
3147                         raw_notifier_call_chain(&netdev_chain,
3148                                             NETDEV_UNREGISTER, dev);
3149
3150                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3151                                      &dev->state)) {
3152                                 /* We must not have linkwatch events
3153                                  * pending on unregister. If this
3154                                  * happens, we simply run the queue
3155                                  * unscheduled, resulting in a noop
3156                                  * for this device.
3157                                  */
3158                                 linkwatch_run_queue();
3159                         }
3160
3161                         __rtnl_unlock();
3162
3163                         rebroadcast_time = jiffies;
3164                 }
3165
3166                 msleep(250);
3167
3168                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3169                         printk(KERN_EMERG "unregister_netdevice: "
3170                                "waiting for %s to become free. Usage "
3171                                "count = %d\n",
3172                                dev->name, atomic_read(&dev->refcnt));
3173                         warning_time = jiffies;
3174                 }
3175         }
3176 }
3177
3178 /* The sequence is:
3179  *
3180  *      rtnl_lock();
3181  *      ...
3182  *      register_netdevice(x1);
3183  *      register_netdevice(x2);
3184  *      ...
3185  *      unregister_netdevice(y1);
3186  *      unregister_netdevice(y2);
3187  *      ...
3188  *      rtnl_unlock();
3189  *      free_netdev(y1);
3190  *      free_netdev(y2);
3191  *
3192  * We are invoked by rtnl_unlock() after it drops the semaphore.
3193  * This allows us to deal with problems:
3194  * 1) We can delete sysfs objects which invoke hotplug
3195  *    without deadlocking with linkwatch via keventd.
3196  * 2) Since we run with the RTNL semaphore not held, we can sleep
3197  *    safely in order to wait for the netdev refcnt to drop to zero.
3198  */
3199 static DEFINE_MUTEX(net_todo_run_mutex);
3200 void netdev_run_todo(void)
3201 {
3202         struct list_head list;
3203
3204         /* Need to guard against multiple cpu's getting out of order. */
3205         mutex_lock(&net_todo_run_mutex);
3206
3207         /* Not safe to do outside the semaphore.  We must not return
3208          * until all unregister events invoked by the local processor
3209          * have been completed (either by this todo run, or one on
3210          * another cpu).
3211          */
3212         if (list_empty(&net_todo_list))
3213                 goto out;
3214
3215         /* Snapshot list, allow later requests */
3216         spin_lock(&net_todo_list_lock);
3217         list_replace_init(&net_todo_list, &list);
3218         spin_unlock(&net_todo_list_lock);
3219
3220         while (!list_empty(&list)) {
3221                 struct net_device *dev
3222                         = list_entry(list.next, struct net_device, todo_list);
3223                 list_del(&dev->todo_list);
3224
3225                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3226                         printk(KERN_ERR "network todo '%s' but state %d\n",
3227                                dev->name, dev->reg_state);
3228                         dump_stack();
3229                         continue;
3230                 }
3231
3232                 netdev_unregister_sysfs(dev);
3233                 dev->reg_state = NETREG_UNREGISTERED;
3234
3235                 netdev_wait_allrefs(dev);
3236
3237                 /* paranoia */
3238                 BUG_ON(atomic_read(&dev->refcnt));
3239                 BUG_TRAP(!dev->ip_ptr);
3240                 BUG_TRAP(!dev->ip6_ptr);
3241                 BUG_TRAP(!dev->dn_ptr);
3242
3243                 /* It must be the very last action,
3244                  * after this 'dev' may point to freed up memory.
3245                  */
3246                 if (dev->destructor)
3247                         dev->destructor(dev);
3248         }
3249
3250 out:
3251         mutex_unlock(&net_todo_run_mutex);
3252 }
3253
3254 /**
3255  *      alloc_netdev - allocate network device
3256  *      @sizeof_priv:   size of private data to allocate space for
3257  *      @name:          device name format string
3258  *      @setup:         callback to initialize device
3259  *
3260  *      Allocates a struct net_device with private data area for driver use
3261  *      and performs basic initialization.
3262  */
3263 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3264                 void (*setup)(struct net_device *))
3265 {
3266         void *p;
3267         struct net_device *dev;
3268         int alloc_size;
3269
3270         /* ensure 32-byte alignment of both the device and private area */
3271         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3272         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3273
3274         p = kzalloc(alloc_size, GFP_KERNEL);
3275         if (!p) {
3276                 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3277                 return NULL;
3278         }
3279
3280         dev = (struct net_device *)
3281                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3282         dev->padded = (char *)dev - (char *)p;
3283
3284         if (sizeof_priv)
3285                 dev->priv = netdev_priv(dev);
3286
3287         setup(dev);
3288         strcpy(dev->name, name);
3289         return dev;
3290 }
3291 EXPORT_SYMBOL(alloc_netdev);
3292
3293 /**
3294  *      free_netdev - free network device
3295  *      @dev: device
3296  *
3297  *      This function does the last stage of destroying an allocated device
3298  *      interface. The reference to the device object is released.
3299  *      If this is the last reference then it will be freed.
3300  */
3301 void free_netdev(struct net_device *dev)
3302 {
3303 #ifdef CONFIG_SYSFS
3304         /*  Compatibility with error handling in drivers */
3305         if (dev->reg_state == NETREG_UNINITIALIZED) {
3306                 kfree((char *)dev - dev->padded);
3307                 return;
3308         }
3309
3310         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3311         dev->reg_state = NETREG_RELEASED;
3312
3313         /* will free via class release */
3314         class_device_put(&dev->class_dev);
3315 #else
3316         kfree((char *)dev - dev->padded);
3317 #endif
3318 }
3319
3320 /* Synchronize with packet receive processing. */
3321 void synchronize_net(void)
3322 {
3323         might_sleep();
3324         synchronize_rcu();
3325 }
3326
3327 /**
3328  *      unregister_netdevice - remove device from the kernel
3329  *      @dev: device
3330  *
3331  *      This function shuts down a device interface and removes it
3332  *      from the kernel tables. On success 0 is returned, on a failure
3333  *      a negative errno code is returned.
3334  *
3335  *      Callers must hold the rtnl semaphore.  You may want
3336  *      unregister_netdev() instead of this.
3337  */
3338
3339 int unregister_netdevice(struct net_device *dev)
3340 {
3341         struct net_device *d, **dp;
3342
3343         BUG_ON(dev_boot_phase);
3344         ASSERT_RTNL();
3345
3346         /* Some devices call without registering for initialization unwind. */
3347         if (dev->reg_state == NETREG_UNINITIALIZED) {
3348                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3349                                   "was registered\n", dev->name, dev);
3350                 return -ENODEV;
3351         }
3352
3353         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3354
3355         /* If device is running, close it first. */
3356         if (dev->flags & IFF_UP)
3357                 dev_close(dev);
3358
3359         /* And unlink it from device chain. */
3360         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3361                 if (d == dev) {
3362                         write_lock_bh(&dev_base_lock);
3363                         hlist_del(&dev->name_hlist);
3364                         hlist_del(&dev->index_hlist);
3365                         if (dev_tail == &dev->next)
3366                                 dev_tail = dp;
3367                         *dp = d->next;
3368                         write_unlock_bh(&dev_base_lock);
3369                         break;
3370                 }
3371         }
3372         if (!d) {
3373                 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3374                        dev->name);
3375                 return -ENODEV;
3376         }
3377
3378         dev->reg_state = NETREG_UNREGISTERING;
3379
3380         synchronize_net();
3381
3382         /* Shutdown queueing discipline. */
3383         dev_shutdown(dev);
3384
3385
3386         /* Notify protocols, that we are about to destroy
3387            this device. They should clean all the things.
3388         */
3389         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3390
3391         /*
3392          *      Flush the multicast chain
3393          */
3394         dev_mc_discard(dev);
3395
3396         if (dev->uninit)
3397                 dev->uninit(dev);
3398
3399         /* Notifier chain MUST detach us from master device. */
3400         BUG_TRAP(!dev->master);
3401
3402         free_divert_blk(dev);
3403
3404         /* Finish processing unregister after unlock */
3405         net_set_todo(dev);
3406
3407         synchronize_net();
3408
3409         dev_put(dev);
3410         return 0;
3411 }
3412
3413 /**
3414  *      unregister_netdev - remove device from the kernel
3415  *      @dev: device
3416  *
3417  *      This function shuts down a device interface and removes it
3418  *      from the kernel tables. On success 0 is returned, on a failure
3419  *      a negative errno code is returned.
3420  *
3421  *      This is just a wrapper for unregister_netdevice that takes
3422  *      the rtnl semaphore.  In general you want to use this and not
3423  *      unregister_netdevice.
3424  */
3425 void unregister_netdev(struct net_device *dev)
3426 {
3427         rtnl_lock();
3428         unregister_netdevice(dev);
3429         rtnl_unlock();
3430 }
3431
3432 EXPORT_SYMBOL(unregister_netdev);
3433
3434 #ifdef CONFIG_HOTPLUG_CPU
3435 static int dev_cpu_callback(struct notifier_block *nfb,
3436                             unsigned long action,
3437                             void *ocpu)
3438 {
3439         struct sk_buff **list_skb;
3440         struct net_device **list_net;
3441         struct sk_buff *skb;
3442         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3443         struct softnet_data *sd, *oldsd;
3444
3445         if (action != CPU_DEAD)
3446                 return NOTIFY_OK;
3447
3448         local_irq_disable();
3449         cpu = smp_processor_id();
3450         sd = &per_cpu(softnet_data, cpu);
3451         oldsd = &per_cpu(softnet_data, oldcpu);
3452
3453         /* Find end of our completion_queue. */
3454         list_skb = &sd->completion_queue;
3455         while (*list_skb)
3456                 list_skb = &(*list_skb)->next;
3457         /* Append completion queue from offline CPU. */
3458         *list_skb = oldsd->completion_queue;
3459         oldsd->completion_queue = NULL;
3460
3461         /* Find end of our output_queue. */
3462         list_net = &sd->output_queue;
3463         while (*list_net)
3464                 list_net = &(*list_net)->next_sched;
3465         /* Append output queue from offline CPU. */
3466         *list_net = oldsd->output_queue;
3467         oldsd->output_queue = NULL;
3468
3469         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3470         local_irq_enable();
3471
3472         /* Process offline CPU's input_pkt_queue */
3473         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3474                 netif_rx(skb);
3475
3476         return NOTIFY_OK;
3477 }
3478 #endif /* CONFIG_HOTPLUG_CPU */
3479
3480 #ifdef CONFIG_NET_DMA
3481 /**
3482  * net_dma_rebalance -
3483  * This is called when the number of channels allocated to the net_dma_client
3484  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3485  */
3486 static void net_dma_rebalance(void)
3487 {
3488         unsigned int cpu, i, n;
3489         struct dma_chan *chan;
3490
3491         if (net_dma_count == 0) {
3492                 for_each_online_cpu(cpu)
3493                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3494                 return;
3495         }
3496
3497         i = 0;
3498         cpu = first_cpu(cpu_online_map);
3499
3500         rcu_read_lock();
3501         list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3502                 n = ((num_online_cpus() / net_dma_count)
3503                    + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3504
3505                 while(n) {
3506                         per_cpu(softnet_data, cpu).net_dma = chan;
3507                         cpu = next_cpu(cpu, cpu_online_map);
3508                         n--;
3509                 }
3510                 i++;
3511         }
3512         rcu_read_unlock();
3513 }
3514
3515 /**
3516  * netdev_dma_event - event callback for the net_dma_client
3517  * @client: should always be net_dma_client
3518  * @chan: DMA channel for the event
3519  * @event: event type
3520  */
3521 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3522         enum dma_event event)
3523 {
3524         spin_lock(&net_dma_event_lock);
3525         switch (event) {
3526         case DMA_RESOURCE_ADDED:
3527                 net_dma_count++;
3528                 net_dma_rebalance();
3529                 break;
3530         case DMA_RESOURCE_REMOVED:
3531                 net_dma_count--;
3532                 net_dma_rebalance();
3533                 break;
3534         default:
3535                 break;
3536         }
3537         spin_unlock(&net_dma_event_lock);
3538 }
3539
3540 /**
3541  * netdev_dma_regiser - register the networking subsystem as a DMA client
3542  */
3543 static int __init netdev_dma_register(void)
3544 {
3545         spin_lock_init(&net_dma_event_lock);
3546         net_dma_client = dma_async_client_register(netdev_dma_event);
3547         if (net_dma_client == NULL)
3548                 return -ENOMEM;
3549
3550         dma_async_client_chan_request(net_dma_client, num_online_cpus());
3551         return 0;
3552 }
3553
3554 #else
3555 static int __init netdev_dma_register(void) { return -ENODEV; }
3556 #endif /* CONFIG_NET_DMA */
3557
3558 /*
3559  *      Initialize the DEV module. At boot time this walks the device list and
3560  *      unhooks any devices that fail to initialise (normally hardware not
3561  *      present) and leaves us with a valid list of present and active devices.
3562  *
3563  */
3564
3565 /*
3566  *       This is called single threaded during boot, so no need
3567  *       to take the rtnl semaphore.
3568  */
3569 static int __init net_dev_init(void)
3570 {
3571         int i, rc = -ENOMEM;
3572
3573         BUG_ON(!dev_boot_phase);
3574
3575         net_random_init();
3576
3577         if (dev_proc_init())
3578                 goto out;
3579
3580         if (netdev_sysfs_init())
3581                 goto out;
3582
3583         INIT_LIST_HEAD(&ptype_all);
3584         for (i = 0; i < 16; i++)
3585                 INIT_LIST_HEAD(&ptype_base[i]);
3586
3587         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3588                 INIT_HLIST_HEAD(&dev_name_head[i]);
3589
3590         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3591                 INIT_HLIST_HEAD(&dev_index_head[i]);
3592
3593         /*
3594          *      Initialise the packet receive queues.
3595          */
3596
3597         for_each_possible_cpu(i) {
3598                 struct softnet_data *queue;
3599
3600                 queue = &per_cpu(softnet_data, i);
3601                 skb_queue_head_init(&queue->input_pkt_queue);
3602                 queue->completion_queue = NULL;
3603                 INIT_LIST_HEAD(&queue->poll_list);
3604                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3605                 queue->backlog_dev.weight = weight_p;
3606                 queue->backlog_dev.poll = process_backlog;
3607                 atomic_set(&queue->backlog_dev.refcnt, 1);
3608         }
3609
3610         netdev_dma_register();
3611
3612         dev_boot_phase = 0;
3613
3614         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3615         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3616
3617         hotcpu_notifier(dev_cpu_callback, 0);
3618         dst_init();
3619         dev_mcast_init();
3620         rc = 0;
3621 out:
3622         return rc;
3623 }
3624
3625 subsys_initcall(net_dev_init);
3626
3627 EXPORT_SYMBOL(__dev_get_by_index);
3628 EXPORT_SYMBOL(__dev_get_by_name);
3629 EXPORT_SYMBOL(__dev_remove_pack);
3630 EXPORT_SYMBOL(dev_valid_name);
3631 EXPORT_SYMBOL(dev_add_pack);
3632 EXPORT_SYMBOL(dev_alloc_name);
3633 EXPORT_SYMBOL(dev_close);
3634 EXPORT_SYMBOL(dev_get_by_flags);
3635 EXPORT_SYMBOL(dev_get_by_index);
3636 EXPORT_SYMBOL(dev_get_by_name);
3637 EXPORT_SYMBOL(dev_open);
3638 EXPORT_SYMBOL(dev_queue_xmit);
3639 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
3640 EXPORT_SYMBOL(dev_queue_xmit_nit);
3641 #endif
3642 EXPORT_SYMBOL(dev_remove_pack);
3643 EXPORT_SYMBOL(dev_set_allmulti);
3644 EXPORT_SYMBOL(dev_set_promiscuity);
3645 EXPORT_SYMBOL(dev_change_flags);
3646 EXPORT_SYMBOL(dev_set_mtu);
3647 EXPORT_SYMBOL(dev_set_mac_address);
3648 EXPORT_SYMBOL(free_netdev);
3649 EXPORT_SYMBOL(netdev_boot_setup_check);
3650 EXPORT_SYMBOL(netdev_set_master);
3651 EXPORT_SYMBOL(netdev_state_change);
3652 EXPORT_SYMBOL(netif_receive_skb);
3653 EXPORT_SYMBOL(netif_rx);
3654 EXPORT_SYMBOL(register_gifconf);
3655 EXPORT_SYMBOL(register_netdevice);
3656 EXPORT_SYMBOL(register_netdevice_notifier);
3657 EXPORT_SYMBOL(skb_checksum_help);
3658 EXPORT_SYMBOL(synchronize_net);
3659 EXPORT_SYMBOL(unregister_netdevice);
3660 EXPORT_SYMBOL(unregister_netdevice_notifier);
3661 EXPORT_SYMBOL(net_enable_timestamp);
3662 EXPORT_SYMBOL(net_disable_timestamp);
3663 EXPORT_SYMBOL(dev_get_flags);
3664 EXPORT_SYMBOL(skb_checksum_setup);
3665
3666 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3667 EXPORT_SYMBOL(br_handle_frame_hook);
3668 EXPORT_SYMBOL(br_fdb_get_hook);
3669 EXPORT_SYMBOL(br_fdb_put_hook);
3670 #endif
3671
3672 #ifdef CONFIG_KMOD
3673 EXPORT_SYMBOL(dev_load);
3674 #endif
3675
3676 EXPORT_PER_CPU_SYMBOL(softnet_data);