net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/config.h>
  80 #include <linux/cpu.h>
  81 #include <linux/types.h>
  82 #include <linux/kernel.h>
  83 #include <linux/sched.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #include <linux/proc_fs.h>
  98 #include <linux/seq_file.h>
  99 #include <linux/stat.h>
 100 #include <linux/if_bridge.h>
 101 #include <linux/divert.h>
 102 #include <net/dst.h>
 103 #include <net/pkt_sched.h>
 104 #include <net/checksum.h>
 105 #include <linux/highmem.h>
 106 #include <linux/init.h>
 107 #include <linux/kmod.h>
 108 #include <linux/module.h>
 109 #include <linux/kallsyms.h>
 110 #include <linux/netpoll.h>
 111 #include <linux/rcupdate.h>
 112 #include <linux/delay.h>
 113 #ifdef CONFIG_NET_RADIO
 114 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
 115 #include <net/iw_handler.h>
 116 #endif  /* CONFIG_NET_RADIO */
 117 #include <linux/vs_network.h>
 118 #include <asm/current.h>
 119
 120 /*
 121  *      The list of packet types we will receive (as opposed to discard)
 122  *      and the routines to invoke.
 123  *
 124  *      Why 16. Because with 16 the only overlap we get on a hash of the
 125  *      low nibble of the protocol value is RARP/SNAP/X.25.
 126  *
 127  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 128  *             sure which should go first, but I bet it won't make much
 129  *             difference if we are running VLANs.  The good news is that
 130  *             this protocol won't be in the list unless compiled in, so
 131  *             the average user (w/out VLANs) will not be adversly affected.
 132  *             --BLG
 133  *
 134  *              0800    IP
 135  *              8100    802.1Q VLAN
 136  *              0001    802.3
 137  *              0002    AX.25
 138  *              0004    802.2
 139  *              8035    RARP
 140  *              0005    SNAP
 141  *              0805    X.25
 142  *              0806    ARP
 143  *              8137    IPX
 144  *              0009    Localtalk
 145  *              86DD    IPv6
 146  */
 147
 148 static DEFINE_SPINLOCK(ptype_lock);
 149 static struct list_head ptype_base[16]; /* 16 way hashed list */
 150 static struct list_head ptype_all;              /* Taps */
 151
 152 /*
 153  * The @dev_base list is protected by @dev_base_lock and the rtln
 154  * semaphore.
 155  *
 156  * Pure readers hold dev_base_lock for reading.
 157  *
 158  * Writers must hold the rtnl semaphore while they loop through the
 159  * dev_base list, and hold dev_base_lock for writing when they do the
 160  * actual updates.  This allows pure readers to access the list even
 161  * while a writer is preparing to update it.
 162  *
 163  * To put it another way, dev_base_lock is held for writing only to
 164  * protect against pure readers; the rtnl semaphore provides the
 165  * protection against other writers.
 166  *
 167  * See, for example usages, register_netdevice() and
 168  * unregister_netdevice(), which must be called with the rtnl
 169  * semaphore held.
 170  */
 171 struct net_device *dev_base;
 172 static struct net_device **dev_tail = &dev_base;
 173 DEFINE_RWLOCK(dev_base_lock);
 174
 175 EXPORT_SYMBOL(dev_base);
 176 EXPORT_SYMBOL(dev_base_lock);
 177
 178 #define NETDEV_HASHBITS 8
 179 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 180 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 181
 182 static inline struct hlist_head *dev_name_hash(const char *name)
 183 {
 184         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 185         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 186 }
 187
 188 static inline struct hlist_head *dev_index_hash(int ifindex)
 189 {
 190         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 191 }
 192
 193 /*
 194  *      Our notifier list
 195  */
 196
 197 static struct notifier_block *netdev_chain;
 198
 199 /*
 200  *      Device drivers call our routines to queue packets here. We empty the
 201  *      queue in the local softnet handler.
 202  */
 203 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 204
 205 #ifdef CONFIG_SYSFS
 206 extern int netdev_sysfs_init(void);
 207 extern int netdev_register_sysfs(struct net_device *);
 208 extern void netdev_unregister_sysfs(struct net_device *);
 209 #else
 210 #define netdev_sysfs_init()             (0)
 211 #define netdev_register_sysfs(dev)      (0)
 212 #define netdev_unregister_sysfs(dev)    do { } while(0)
 213 #endif
 214
 215
 216 /*******************************************************************************
 217
 218                 Protocol management and registration routines
 219
 220 *******************************************************************************/
 221
 222 /*
 223  *      For efficiency
 224  */
 225
 226 int netdev_nit;
 227
 228 /*
 229  *      Add a protocol ID to the list. Now that the input handler is
 230  *      smarter we can dispense with all the messy stuff that used to be
 231  *      here.
 232  *
 233  *      BEWARE!!! Protocol handlers, mangling input packets,
 234  *      MUST BE last in hash buckets and checking protocol handlers
 235  *      MUST start from promiscuous ptype_all chain in net_bh.
 236  *      It is true now, do not change it.
 237  *      Explanation follows: if protocol handler, mangling packet, will
 238  *      be the first on list, it is not able to sense, that packet
 239  *      is cloned and should be copied-on-write, so that it will
 240  *      change it and subsequent readers will get broken packet.
 241  *                                                      --ANK (980803)
 242  */
 243
 244 /**
 245  *      dev_add_pack - add packet handler
 246  *      @pt: packet type declaration
 247  *
 248  *      Add a protocol handler to the networking stack. The passed &packet_type
 249  *      is linked into kernel lists and may not be freed until it has been
 250  *      removed from the kernel lists.
 251  *
 252  *      This call does not sleep therefore it can not
 253  *      guarantee all CPU's that are in middle of receiving packets
 254  *      will see the new packet type (until the next received packet).
 255  */
 256
 257 void dev_add_pack(struct packet_type *pt)
 258 {
 259         int hash;
 260
 261         spin_lock_bh(&ptype_lock);
 262         if (pt->type == htons(ETH_P_ALL)) {
 263                 netdev_nit++;
 264                 list_add_rcu(&pt->list, &ptype_all);
 265         } else {
 266                 hash = ntohs(pt->type) & 15;
 267                 list_add_rcu(&pt->list, &ptype_base[hash]);
 268         }
 269         spin_unlock_bh(&ptype_lock);
 270 }
 271
 272 /**
 273  *      __dev_remove_pack        - remove packet handler
 274  *      @pt: packet type declaration
 275  *
 276  *      Remove a protocol handler that was previously added to the kernel
 277  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 278  *      from the kernel lists and can be freed or reused once this function
 279  *      returns.
 280  *
 281  *      The packet type might still be in use by receivers
 282  *      and must not be freed until after all the CPU's have gone
 283  *      through a quiescent state.
 284  */
 285 void __dev_remove_pack(struct packet_type *pt)
 286 {
 287         struct list_head *head;
 288         struct packet_type *pt1;
 289
 290         spin_lock_bh(&ptype_lock);
 291
 292         if (pt->type == htons(ETH_P_ALL)) {
 293                 netdev_nit--;
 294                 head = &ptype_all;
 295         } else
 296                 head = &ptype_base[ntohs(pt->type) & 15];
 297
 298         list_for_each_entry(pt1, head, list) {
 299                 if (pt == pt1) {
 300                         list_del_rcu(&pt->list);
 301                         goto out;
 302                 }
 303         }
 304
 305         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 306 out:
 307         spin_unlock_bh(&ptype_lock);
 308 }
 309 /**
 310  *      dev_remove_pack  - remove packet handler
 311  *      @pt: packet type declaration
 312  *
 313  *      Remove a protocol handler that was previously added to the kernel
 314  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 315  *      from the kernel lists and can be freed or reused once this function
 316  *      returns.
 317  *
 318  *      This call sleeps to guarantee that no CPU is looking at the packet
 319  *      type after return.
 320  */
 321 void dev_remove_pack(struct packet_type *pt)
 322 {
 323         __dev_remove_pack(pt);
 324
 325         synchronize_net();
 326 }
 327
 328 /******************************************************************************
 329
 330                       Device Boot-time Settings Routines
 331
 332 *******************************************************************************/
 333
 334 /* Boot time configuration table */
 335 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 336
 337 /**
 338  *      netdev_boot_setup_add   - add new setup entry
 339  *      @name: name of the device
 340  *      @map: configured settings for the device
 341  *
 342  *      Adds new setup entry to the dev_boot_setup list.  The function
 343  *      returns 0 on error and 1 on success.  This is a generic routine to
 344  *      all netdevices.
 345  */
 346 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 347 {
 348         struct netdev_boot_setup *s;
 349         int i;
 350
 351         s = dev_boot_setup;
 352         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 353                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 354                         memset(s[i].name, 0, sizeof(s[i].name));
 355                         strcpy(s[i].name, name);
 356                         memcpy(&s[i].map, map, sizeof(s[i].map));
 357                         break;
 358                 }
 359         }
 360
 361         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 362 }
 363
 364 /**
 365  *      netdev_boot_setup_check - check boot time settings
 366  *      @dev: the netdevice
 367  *
 368  *      Check boot time settings for the device.
 369  *      The found settings are set for the device to be used
 370  *      later in the device probing.
 371  *      Returns 0 if no settings found, 1 if they are.
 372  */
 373 int netdev_boot_setup_check(struct net_device *dev)
 374 {
 375         struct netdev_boot_setup *s = dev_boot_setup;
 376         int i;
 377
 378         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 379                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 380                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 381                         dev->irq        = s[i].map.irq;
 382                         dev->base_addr  = s[i].map.base_addr;
 383                         dev->mem_start  = s[i].map.mem_start;
 384                         dev->mem_end    = s[i].map.mem_end;
 385                         return 1;
 386                 }
 387         }
 388         return 0;
 389 }
 390
 391
 392 /**
 393  *      netdev_boot_base        - get address from boot time settings
 394  *      @prefix: prefix for network device
 395  *      @unit: id for network device
 396  *
 397  *      Check boot time settings for the base address of device.
 398  *      The found settings are set for the device to be used
 399  *      later in the device probing.
 400  *      Returns 0 if no settings found.
 401  */
 402 unsigned long netdev_boot_base(const char *prefix, int unit)
 403 {
 404         const struct netdev_boot_setup *s = dev_boot_setup;
 405         char name[IFNAMSIZ];
 406         int i;
 407
 408         sprintf(name, "%s%d", prefix, unit);
 409
 410         /*
 411          * If device already registered then return base of 1
 412          * to indicate not to probe for this interface
 413          */
 414         if (__dev_get_by_name(name))
 415                 return 1;
 416
 417         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 418                 if (!strcmp(name, s[i].name))
 419                         return s[i].map.base_addr;
 420         return 0;
 421 }
 422
 423 /*
 424  * Saves at boot time configured settings for any netdevice.
 425  */
 426 int __init netdev_boot_setup(char *str)
 427 {
 428         int ints[5];
 429         struct ifmap map;
 430
 431         str = get_options(str, ARRAY_SIZE(ints), ints);
 432         if (!str || !*str)
 433                 return 0;
 434
 435         /* Save settings */
 436         memset(&map, 0, sizeof(map));
 437         if (ints[0] > 0)
 438                 map.irq = ints[1];
 439         if (ints[0] > 1)
 440                 map.base_addr = ints[2];
 441         if (ints[0] > 2)
 442                 map.mem_start = ints[3];
 443         if (ints[0] > 3)
 444                 map.mem_end = ints[4];
 445
 446         /* Add new entry to the list */
 447         return netdev_boot_setup_add(str, &map);
 448 }
 449
 450 __setup("netdev=", netdev_boot_setup);
 451
 452 /*******************************************************************************
 453
 454                             Device Interface Subroutines
 455
 456 *******************************************************************************/
 457
 458 /**
 459  *      __dev_get_by_name       - find a device by its name
 460  *      @name: name to find
 461  *
 462  *      Find an interface by name. Must be called under RTNL semaphore
 463  *      or @dev_base_lock. If the name is found a pointer to the device
 464  *      is returned. If the name is not found then %NULL is returned. The
 465  *      reference counters are not incremented so the caller must be
 466  *      careful with locks.
 467  */
 468
 469 struct net_device *__dev_get_by_name(const char *name)
 470 {
 471         struct hlist_node *p;
 472
 473         hlist_for_each(p, dev_name_hash(name)) {
 474                 struct net_device *dev
 475                         = hlist_entry(p, struct net_device, name_hlist);
 476                 if (!strncmp(dev->name, name, IFNAMSIZ))
 477                         return dev;
 478         }
 479         return NULL;
 480 }
 481
 482 /**
 483  *      dev_get_by_name         - find a device by its name
 484  *      @name: name to find
 485  *
 486  *      Find an interface by name. This can be called from any
 487  *      context and does its own locking. The returned handle has
 488  *      the usage count incremented and the caller must use dev_put() to
 489  *      release it when it is no longer needed. %NULL is returned if no
 490  *      matching device is found.
 491  */
 492
 493 struct net_device *dev_get_by_name(const char *name)
 494 {
 495         struct net_device *dev;
 496
 497         read_lock(&dev_base_lock);
 498         dev = __dev_get_by_name(name);
 499         if (dev)
 500                 dev_hold(dev);
 501         read_unlock(&dev_base_lock);
 502         return dev;
 503 }
 504
 505 /**
 506  *      __dev_get_by_index - find a device by its ifindex
 507  *      @ifindex: index of device
 508  *
 509  *      Search for an interface by index. Returns %NULL if the device
 510  *      is not found or a pointer to the device. The device has not
 511  *      had its reference counter increased so the caller must be careful
 512  *      about locking. The caller must hold either the RTNL semaphore
 513  *      or @dev_base_lock.
 514  */
 515
 516 struct net_device *__dev_get_by_index(int ifindex)
 517 {
 518         struct hlist_node *p;
 519
 520         hlist_for_each(p, dev_index_hash(ifindex)) {
 521                 struct net_device *dev
 522                         = hlist_entry(p, struct net_device, index_hlist);
 523                 if (dev->ifindex == ifindex)
 524                         return dev;
 525         }
 526         return NULL;
 527 }
 528
 529
 530 /**
 531  *      dev_get_by_index - find a device by its ifindex
 532  *      @ifindex: index of device
 533  *
 534  *      Search for an interface by index. Returns NULL if the device
 535  *      is not found or a pointer to the device. The device returned has
 536  *      had a reference added and the pointer is safe until the user calls
 537  *      dev_put to indicate they have finished with it.
 538  */
 539
 540 struct net_device *dev_get_by_index(int ifindex)
 541 {
 542         struct net_device *dev;
 543
 544         read_lock(&dev_base_lock);
 545         dev = __dev_get_by_index(ifindex);
 546         if (dev)
 547                 dev_hold(dev);
 548         read_unlock(&dev_base_lock);
 549         return dev;
 550 }
 551
 552 /**
 553  *      dev_getbyhwaddr - find a device by its hardware address
 554  *      @type: media type of device
 555  *      @ha: hardware address
 556  *
 557  *      Search for an interface by MAC address. Returns NULL if the device
 558  *      is not found or a pointer to the device. The caller must hold the
 559  *      rtnl semaphore. The returned device has not had its ref count increased
 560  *      and the caller must therefore be careful about locking
 561  *
 562  *      BUGS:
 563  *      If the API was consistent this would be __dev_get_by_hwaddr
 564  */
 565
 566 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 567 {
 568         struct net_device *dev;
 569
 570         ASSERT_RTNL();
 571
 572         for (dev = dev_base; dev; dev = dev->next)
 573                 if (dev->type == type &&
 574                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 575                         break;
 576         return dev;
 577 }
 578
 579 EXPORT_SYMBOL(dev_getbyhwaddr);
 580
 581 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 582 {
 583         struct net_device *dev;
 584
 585         rtnl_lock();
 586         for (dev = dev_base; dev; dev = dev->next) {
 587                 if (dev->type == type) {
 588                         dev_hold(dev);
 589                         break;
 590                 }
 591         }
 592         rtnl_unlock();
 593         return dev;
 594 }
 595
 596 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 597
 598 /**
 599  *      dev_get_by_flags - find any device with given flags
 600  *      @if_flags: IFF_* values
 601  *      @mask: bitmask of bits in if_flags to check
 602  *
 603  *      Search for any interface with the given flags. Returns NULL if a device
 604  *      is not found or a pointer to the device. The device returned has
 605  *      had a reference added and the pointer is safe until the user calls
 606  *      dev_put to indicate they have finished with it.
 607  */
 608
 609 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 610 {
 611         struct net_device *dev;
 612
 613         read_lock(&dev_base_lock);
 614         for (dev = dev_base; dev != NULL; dev = dev->next) {
 615                 if (((dev->flags ^ if_flags) & mask) == 0) {
 616                         dev_hold(dev);
 617                         break;
 618                 }
 619         }
 620         read_unlock(&dev_base_lock);
 621         return dev;
 622 }
 623
 624 /**
 625  *      dev_valid_name - check if name is okay for network device
 626  *      @name: name string
 627  *
 628  *      Network device names need to be valid file names to
 629  *      to allow sysfs to work
 630  */
 631 int dev_valid_name(const char *name)
 632 {
 633         return !(*name == '\0'
 634                  || !strcmp(name, ".")
 635                  || !strcmp(name, "..")
 636                  || strchr(name, '/'));
 637 }
 638
 639 /**
 640  *      dev_alloc_name - allocate a name for a device
 641  *      @dev: device
 642  *      @name: name format string
 643  *
 644  *      Passed a format string - eg "lt%d" it will try and find a suitable
 645  *      id. Not efficient for many devices, not called a lot. The caller
 646  *      must hold the dev_base or rtnl lock while allocating the name and
 647  *      adding the device in order to avoid duplicates. Returns the number
 648  *      of the unit assigned or a negative errno code.
 649  */
 650
 651 int dev_alloc_name(struct net_device *dev, const char *name)
 652 {
 653         int i = 0;
 654         char buf[IFNAMSIZ];
 655         const char *p;
 656         const int max_netdevices = 8*PAGE_SIZE;
 657         long *inuse;
 658         struct net_device *d;
 659
 660         p = strnchr(name, IFNAMSIZ-1, '%');
 661         if (p) {
 662                 /*
 663                  * Verify the string as this thing may have come from
 664                  * the user.  There must be either one "%d" and no other "%"
 665                  * characters.
 666                  */
 667                 if (p[1] != 'd' || strchr(p + 2, '%'))
 668                         return -EINVAL;
 669
 670                 /* Use one page as a bit array of possible slots */
 671                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 672                 if (!inuse)
 673                         return -ENOMEM;
 674
 675                 for (d = dev_base; d; d = d->next) {
 676                         if (!sscanf(d->name, name, &i))
 677                                 continue;
 678                         if (i < 0 || i >= max_netdevices)
 679                                 continue;
 680
 681                         /*  avoid cases where sscanf is not exact inverse of printf */
 682                         snprintf(buf, sizeof(buf), name, i);
 683                         if (!strncmp(buf, d->name, IFNAMSIZ))
 684                                 set_bit(i, inuse);
 685                 }
 686
 687                 i = find_first_zero_bit(inuse, max_netdevices);
 688                 free_page((unsigned long) inuse);
 689         }
 690
 691         snprintf(buf, sizeof(buf), name, i);
 692         if (!__dev_get_by_name(buf)) {
 693                 strlcpy(dev->name, buf, IFNAMSIZ);
 694                 return i;
 695         }
 696
 697         /* It is possible to run out of possible slots
 698          * when the name is long and there isn't enough space left
 699          * for the digits, or if all bits are used.
 700          */
 701         return -ENFILE;
 702 }
 703
 704
 705 /**
 706  *      dev_change_name - change name of a device
 707  *      @dev: device
 708  *      @newname: name (or format string) must be at least IFNAMSIZ
 709  *
 710  *      Change name of a device, can pass format strings "eth%d".
 711  *      for wildcarding.
 712  */
 713 int dev_change_name(struct net_device *dev, char *newname)
 714 {
 715         int err = 0;
 716
 717         ASSERT_RTNL();
 718
 719         if (dev->flags & IFF_UP)
 720                 return -EBUSY;
 721
 722         if (!dev_valid_name(newname))
 723                 return -EINVAL;
 724
 725         if (strchr(newname, '%')) {
 726                 err = dev_alloc_name(dev, newname);
 727                 if (err < 0)
 728                         return err;
 729                 strcpy(newname, dev->name);
 730         }
 731         else if (__dev_get_by_name(newname))
 732                 return -EEXIST;
 733         else
 734                 strlcpy(dev->name, newname, IFNAMSIZ);
 735
 736         err = class_device_rename(&dev->class_dev, dev->name);
 737         if (!err) {
 738                 hlist_del(&dev->name_hlist);
 739                 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 740                 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 741         }
 742
 743         return err;
 744 }
 745
 746 /**
 747  *      netdev_features_change - device changes fatures
 748  *      @dev: device to cause notification
 749  *
 750  *      Called to indicate a device has changed features.
 751  */
 752 void netdev_features_change(struct net_device *dev)
 753 {
 754         notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 755 }
 756 EXPORT_SYMBOL(netdev_features_change);
 757
 758 /**
 759  *      netdev_state_change - device changes state
 760  *      @dev: device to cause notification
 761  *
 762  *      Called to indicate a device has changed state. This function calls
 763  *      the notifier chains for netdev_chain and sends a NEWLINK message
 764  *      to the routing socket.
 765  */
 766 void netdev_state_change(struct net_device *dev)
 767 {
 768         if (dev->flags & IFF_UP) {
 769                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 770                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 771         }
 772 }
 773
 774 /**
 775  *      dev_load        - load a network module
 776  *      @name: name of interface
 777  *
 778  *      If a network interface is not present and the process has suitable
 779  *      privileges this function loads the module. If module loading is not
 780  *      available in this kernel then it becomes a nop.
 781  */
 782
 783 void dev_load(const char *name)
 784 {
 785         struct net_device *dev;
 786
 787         read_lock(&dev_base_lock);
 788         dev = __dev_get_by_name(name);
 789         read_unlock(&dev_base_lock);
 790
 791         if (!dev && capable(CAP_SYS_MODULE))
 792                 request_module("%s", name);
 793 }
 794
 795 static int default_rebuild_header(struct sk_buff *skb)
 796 {
 797         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 798                skb->dev ? skb->dev->name : "NULL!!!");
 799         kfree_skb(skb);
 800         return 1;
 801 }
 802
 803
 804 /**
 805  *      dev_open        - prepare an interface for use.
 806  *      @dev:   device to open
 807  *
 808  *      Takes a device from down to up state. The device's private open
 809  *      function is invoked and then the multicast lists are loaded. Finally
 810  *      the device is moved into the up state and a %NETDEV_UP message is
 811  *      sent to the netdev notifier chain.
 812  *
 813  *      Calling this function on an active interface is a nop. On a failure
 814  *      a negative errno code is returned.
 815  */
 816 int dev_open(struct net_device *dev)
 817 {
 818         int ret = 0;
 819
 820         /*
 821          *      Is it already up?
 822          */
 823
 824         if (dev->flags & IFF_UP)
 825                 return 0;
 826
 827         /*
 828          *      Is it even present?
 829          */
 830         if (!netif_device_present(dev))
 831                 return -ENODEV;
 832
 833         /*
 834          *      Call device private open method
 835          */
 836         set_bit(__LINK_STATE_START, &dev->state);
 837         if (dev->open) {
 838                 ret = dev->open(dev);
 839                 if (ret)
 840                         clear_bit(__LINK_STATE_START, &dev->state);
 841         }
 842
 843         /*
 844          *      If it went open OK then:
 845          */
 846
 847         if (!ret) {
 848                 /*
 849                  *      Set the flags.
 850                  */
 851                 dev->flags |= IFF_UP;
 852
 853                 /*
 854                  *      Initialize multicasting status
 855                  */
 856                 dev_mc_upload(dev);
 857
 858                 /*
 859                  *      Wakeup transmit queue engine
 860                  */
 861                 dev_activate(dev);
 862
 863                 /*
 864                  *      ... and announce new interface.
 865                  */
 866                 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 867         }
 868         return ret;
 869 }
 870
 871 /**
 872  *      dev_close - shutdown an interface.
 873  *      @dev: device to shutdown
 874  *
 875  *      This function moves an active device into down state. A
 876  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 877  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 878  *      chain.
 879  */
 880 int dev_close(struct net_device *dev)
 881 {
 882         if (!(dev->flags & IFF_UP))
 883                 return 0;
 884
 885         /*
 886          *      Tell people we are going down, so that they can
 887          *      prepare to death, when device is still operating.
 888          */
 889         notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 890
 891         dev_deactivate(dev);
 892
 893         clear_bit(__LINK_STATE_START, &dev->state);
 894
 895         /* Synchronize to scheduled poll. We cannot touch poll list,
 896          * it can be even on different cpu. So just clear netif_running(),
 897          * and wait when poll really will happen. Actually, the best place
 898          * for this is inside dev->stop() after device stopped its irq
 899          * engine, but this requires more changes in devices. */
 900
 901         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 902         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 903                 /* No hurry. */
 904                 msleep(1);
 905         }
 906
 907         /*
 908          *      Call the device specific close. This cannot fail.
 909          *      Only if device is UP
 910          *
 911          *      We allow it to be called even after a DETACH hot-plug
 912          *      event.
 913          */
 914         if (dev->stop)
 915                 dev->stop(dev);
 916
 917         /*
 918          *      Device is now down.
 919          */
 920
 921         dev->flags &= ~IFF_UP;
 922
 923         /*
 924          * Tell people we are down
 925          */
 926         notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
 927
 928         return 0;
 929 }
 930
 931
 932 /*
 933  *      Device change register/unregister. These are not inline or static
 934  *      as we export them to the world.
 935  */
 936
 937 /**
 938  *      register_netdevice_notifier - register a network notifier block
 939  *      @nb: notifier
 940  *
 941  *      Register a notifier to be called when network device events occur.
 942  *      The notifier passed is linked into the kernel structures and must
 943  *      not be reused until it has been unregistered. A negative errno code
 944  *      is returned on a failure.
 945  *
 946  *      When registered all registration and up events are replayed
 947  *      to the new notifier to allow device to have a race free
 948  *      view of the network device list.
 949  */
 950
 951 int register_netdevice_notifier(struct notifier_block *nb)
 952 {
 953         struct net_device *dev;
 954         int err;
 955
 956         rtnl_lock();
 957         err = notifier_chain_register(&netdev_chain, nb);
 958         if (!err) {
 959                 for (dev = dev_base; dev; dev = dev->next) {
 960                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
 961
 962                         if (dev->flags & IFF_UP)
 963                                 nb->notifier_call(nb, NETDEV_UP, dev);
 964                 }
 965         }
 966         rtnl_unlock();
 967         return err;
 968 }
 969
 970 /**
 971  *      unregister_netdevice_notifier - unregister a network notifier block
 972  *      @nb: notifier
 973  *
 974  *      Unregister a notifier previously registered by
 975  *      register_netdevice_notifier(). The notifier is unlinked into the
 976  *      kernel structures and may then be reused. A negative errno code
 977  *      is returned on a failure.
 978  */
 979
 980 int unregister_netdevice_notifier(struct notifier_block *nb)
 981 {
 982         return notifier_chain_unregister(&netdev_chain, nb);
 983 }
 984
 985 /**
 986  *      call_netdevice_notifiers - call all network notifier blocks
 987  *      @val: value passed unmodified to notifier function
 988  *      @v:   pointer passed unmodified to notifier function
 989  *
 990  *      Call all network notifier blocks.  Parameters and return value
 991  *      are as for notifier_call_chain().
 992  */
 993
 994 int call_netdevice_notifiers(unsigned long val, void *v)
 995 {
 996         return notifier_call_chain(&netdev_chain, val, v);
 997 }
 998
 999 /* When > 0 there are consumers of rx skb time stamps */
1000 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1001
1002 void net_enable_timestamp(void)
1003 {
1004         atomic_inc(&netstamp_needed);
1005 }
1006
1007 void net_disable_timestamp(void)
1008 {
1009         atomic_dec(&netstamp_needed);
1010 }
1011
1012 void __net_timestamp(struct sk_buff *skb)
1013 {
1014         struct timeval tv;
1015
1016         do_gettimeofday(&tv);
1017         skb_set_timestamp(skb, &tv);
1018 }
1019 EXPORT_SYMBOL(__net_timestamp);
1020
1021 static inline void net_timestamp(struct sk_buff *skb)
1022 {
1023         if (atomic_read(&netstamp_needed))
1024                 __net_timestamp(skb);
1025         else {
1026                 skb->tstamp.off_sec = 0;
1027                 skb->tstamp.off_usec = 0;
1028         }
1029 }
1030
1031 /*
1032  *      Support routine. Sends outgoing frames to any network
1033  *      taps currently in use.
1034  */
1035
1036 #if !((defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)))
1037 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,12)
1038 static
1039 #endif
1040 #endif
1041 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1042 {
1043         struct packet_type *ptype;
1044
1045         net_timestamp(skb);
1046
1047         rcu_read_lock();
1048         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1049                 /* Never send packets back to the socket
1050                  * they originated from - MvS (miquels@drinkel.ow.org)
1051                  */
1052                 if ((ptype->dev == dev || !ptype->dev) &&
1053                     (ptype->af_packet_priv == NULL ||
1054                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1055                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1056                         if (!skb2)
1057                                 break;
1058
1059                         /* skb->nh should be correctly
1060                            set by sender, so that the second statement is
1061                            just protection against buggy protocols.
1062                          */
1063                         skb2->mac.raw = skb2->data;
1064
1065                         if (skb2->nh.raw < skb2->data ||
1066                             skb2->nh.raw > skb2->tail) {
1067                                 if (net_ratelimit())
1068                                         printk(KERN_CRIT "protocol %04x is "
1069                                                "buggy, dev %s\n",
1070                                                skb2->protocol, dev->name);
1071                                 skb2->nh.raw = skb2->data;
1072                         }
1073
1074                         skb2->h.raw = skb2->nh.raw;
1075                         skb2->pkt_type = PACKET_OUTGOING;
1076                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1077                 }
1078         }
1079         rcu_read_unlock();
1080 }
1081
1082 /*
1083  * Invalidate hardware checksum when packet is to be mangled, and
1084  * complete checksum manually on outgoing path.
1085  */
1086 int skb_checksum_help(struct sk_buff *skb, int inward)
1087 {
1088         unsigned int csum;
1089         int ret = 0, offset = skb->h.raw - skb->data;
1090
1091         if (inward) {
1092                 skb->ip_summed = CHECKSUM_NONE;
1093                 goto out;
1094         }
1095
1096         if (skb_cloned(skb)) {
1097                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1098                 if (ret)
1099                         goto out;
1100         }
1101
1102         BUG_ON(offset > (int)skb->len);
1103         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1104
1105         offset = skb->tail - skb->h.raw;
1106         BUG_ON(offset <= 0);
1107         BUG_ON(skb->csum + 2 > offset);
1108
1109         *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1110         skb->ip_summed = CHECKSUM_NONE;
1111 out:
1112         return ret;
1113 }
1114
1115 /* Take action when hardware reception checksum errors are detected. */
1116 #ifdef CONFIG_BUG
1117 void netdev_rx_csum_fault(struct net_device *dev)
1118 {
1119         if (net_ratelimit()) {
1120                 printk(KERN_ERR "%s: hw csum failure.\n",
1121                         dev ? dev->name : "<unknown>");
1122                 dump_stack();
1123         }
1124 }
1125 EXPORT_SYMBOL(netdev_rx_csum_fault);
1126 #endif
1127
1128 #ifdef CONFIG_HIGHMEM
1129 /* Actually, we should eliminate this check as soon as we know, that:
1130  * 1. IOMMU is present and allows to map all the memory.
1131  * 2. No high memory really exists on this machine.
1132  */
1133
1134 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1135 {
1136         int i;
1137
1138         if (dev->features & NETIF_F_HIGHDMA)
1139                 return 0;
1140
1141         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1142                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1143                         return 1;
1144
1145         return 0;
1146 }
1147 #else
1148 #define illegal_highdma(dev, skb)       (0)
1149 #endif
1150
1151 /* Keep head the same: replace data */
1152 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1153 {
1154         unsigned int size;
1155         u8 *data;
1156         long offset;
1157         struct skb_shared_info *ninfo;
1158         int headerlen = skb->data - skb->head;
1159         int expand = (skb->tail + skb->data_len) - skb->end;
1160
1161         if (skb_shared(skb))
1162                 BUG();
1163
1164         if (expand <= 0)
1165                 expand = 0;
1166
1167         size = skb->end - skb->head + expand;
1168         size = SKB_DATA_ALIGN(size);
1169         data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1170         if (!data)
1171                 return -ENOMEM;
1172
1173         /* Copy entire thing */
1174         if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1175                 BUG();
1176
1177         /* Set up shinfo */
1178         ninfo = (struct skb_shared_info*)(data + size);
1179         atomic_set(&ninfo->dataref, 1);
1180         ninfo->tso_size = skb_shinfo(skb)->tso_size;
1181         ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1182         ninfo->ufo_size = skb_shinfo(skb)->ufo_size;
1183         ninfo->nr_frags = 0;
1184         ninfo->frag_list = NULL;
1185
1186         /* Offset between the two in bytes */
1187         offset = data - skb->head;
1188
1189         /* Free old data. */
1190         skb_release_data(skb);
1191
1192         skb->head = data;
1193         skb->end  = data + size;
1194
1195         /* Set up new pointers */
1196         skb->h.raw   += offset;
1197         skb->nh.raw  += offset;
1198         skb->mac.raw += offset;
1199         skb->tail    += offset;
1200         skb->data    += offset;
1201
1202         /* We are no longer a clone, even if we were. */
1203         skb->cloned    = 0;
1204
1205         skb->tail     += skb->data_len;
1206         skb->data_len  = 0;
1207         return 0;
1208 }
1209
1210 #define HARD_TX_LOCK(dev, cpu) {                        \
1211         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1212                 spin_lock(&dev->xmit_lock);             \
1213                 dev->xmit_lock_owner = cpu;             \
1214         }                                               \
1215 }
1216
1217 #define HARD_TX_UNLOCK(dev) {                           \
1218         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1219                 dev->xmit_lock_owner = -1;              \
1220                 spin_unlock(&dev->xmit_lock);           \
1221         }                                               \
1222 }
1223
1224 /**
1225  *      dev_queue_xmit - transmit a buffer
1226  *      @skb: buffer to transmit
1227  *
1228  *      Queue a buffer for transmission to a network device. The caller must
1229  *      have set the device and priority and built the buffer before calling
1230  *      this function. The function can be called from an interrupt.
1231  *
1232  *      A negative errno code is returned on a failure. A success does not
1233  *      guarantee the frame will be transmitted as it may be dropped due
1234  *      to congestion or traffic shaping.
1235  *
1236  * -----------------------------------------------------------------------------------
1237  *      I notice this method can also return errors from the queue disciplines,
1238  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1239  *      be positive.
1240  *
1241  *      Regardless of the return value, the skb is consumed, so it is currently
1242  *      difficult to retry a send to this method.  (You can bump the ref count
1243  *      before sending to hold a reference for retry if you are careful.)
1244  *
1245  *      When calling this method, interrupts MUST be enabled.  This is because
1246  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1247  *          --BLG
1248  */
1249
1250 int dev_queue_xmit(struct sk_buff *skb)
1251 {
1252         struct net_device *dev = skb->dev;
1253         struct Qdisc *q;
1254         int rc = -ENOMEM;
1255
1256         if (skb_shinfo(skb)->frag_list &&
1257             !(dev->features & NETIF_F_FRAGLIST) &&
1258             __skb_linearize(skb, GFP_ATOMIC))
1259                 goto out_kfree_skb;
1260
1261         /* Fragmented skb is linearized if device does not support SG,
1262          * or if at least one of fragments is in highmem and device
1263          * does not support DMA from it.
1264          */
1265         if (skb_shinfo(skb)->nr_frags &&
1266             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1267             __skb_linearize(skb, GFP_ATOMIC))
1268                 goto out_kfree_skb;
1269
1270         /* If packet is not checksummed and device does not support
1271          * checksumming for this protocol, complete checksumming here.
1272          */
1273         if (skb->ip_summed == CHECKSUM_HW &&
1274             (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1275              (!(dev->features & NETIF_F_IP_CSUM) ||
1276               skb->protocol != htons(ETH_P_IP))))
1277                 if (skb_checksum_help(skb, 0))
1278                         goto out_kfree_skb;
1279
1280         spin_lock_prefetch(&dev->queue_lock);
1281
1282         /* Disable soft irqs for various locks below. Also
1283          * stops preemption for RCU.
1284          */
1285         local_bh_disable();
1286
1287         /* Updates of qdisc are serialized by queue_lock.
1288          * The struct Qdisc which is pointed to by qdisc is now a
1289          * rcu structure - it may be accessed without acquiring
1290          * a lock (but the structure may be stale.) The freeing of the
1291          * qdisc will be deferred until it's known that there are no
1292          * more references to it.
1293          *
1294          * If the qdisc has an enqueue function, we still need to
1295          * hold the queue_lock before calling it, since queue_lock
1296          * also serializes access to the device queue.
1297          */
1298
1299         q = rcu_dereference(dev->qdisc);
1300 #ifdef CONFIG_NET_CLS_ACT
1301         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1302 #endif
1303         if (q->enqueue) {
1304                 /* Grab device queue */
1305                 spin_lock(&dev->queue_lock);
1306                 q = dev->qdisc;
1307                 if (q->enqueue) {
1308                         rc = q->enqueue(skb, q);
1309                         qdisc_run(dev);
1310                         spin_unlock(&dev->queue_lock);
1311
1312                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1313                         goto out;
1314                 }
1315                 spin_unlock(&dev->queue_lock);
1316         }
1317
1318         /* The device has no queue. Common case for software devices:
1319            loopback, all the sorts of tunnels...
1320
1321            Really, it is unlikely that xmit_lock protection is necessary here.
1322            (f.e. loopback and IP tunnels are clean ignoring statistics
1323            counters.)
1324            However, it is possible, that they rely on protection
1325            made by us here.
1326
1327            Check this and shot the lock. It is not prone from deadlocks.
1328            Either shot noqueue qdisc, it is even simpler 8)
1329          */
1330         if (dev->flags & IFF_UP) {
1331                 int cpu = smp_processor_id(); /* ok because BHs are off */
1332
1333                 if (dev->xmit_lock_owner != cpu) {
1334
1335                         HARD_TX_LOCK(dev, cpu);
1336
1337                         if (!netif_queue_stopped(dev)) {
1338                                 if (netdev_nit)
1339                                         dev_queue_xmit_nit(skb, dev);
1340
1341                                 rc = 0;
1342                                 if (!dev->hard_start_xmit(skb, dev)) {
1343                                         HARD_TX_UNLOCK(dev);
1344                                         goto out;
1345                                 }
1346                         }
1347                         HARD_TX_UNLOCK(dev);
1348                         if (net_ratelimit())
1349                                 printk(KERN_CRIT "Virtual device %s asks to "
1350                                        "queue packet!\n", dev->name);
1351                 } else {
1352                         /* Recursion is detected! It is possible,
1353                          * unfortunately */
1354                         if (net_ratelimit())
1355                                 printk(KERN_CRIT "Dead loop on virtual device "
1356                                        "%s, fix it urgently!\n", dev->name);
1357                 }
1358         }
1359
1360         rc = -ENETDOWN;
1361         local_bh_enable();
1362
1363 out_kfree_skb:
1364         kfree_skb(skb);
1365         return rc;
1366 out:
1367         local_bh_enable();
1368         return rc;
1369 }
1370
1371
1372 /*=======================================================================
1373                         Receiver routines
1374   =======================================================================*/
1375
1376 int netdev_max_backlog = 1000;
1377 int netdev_budget = 300;
1378 int weight_p = 64;            /* old backlog weight */
1379
1380 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1381
1382
1383 /**
1384  *      netif_rx        -       post buffer to the network code
1385  *      @skb: buffer to post
1386  *
1387  *      This function receives a packet from a device driver and queues it for
1388  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1389  *      may be dropped during processing for congestion control or by the
1390  *      protocol layers.
1391  *
1392  *      return values:
1393  *      NET_RX_SUCCESS  (no congestion)
1394  *      NET_RX_CN_LOW   (low congestion)
1395  *      NET_RX_CN_MOD   (moderate congestion)
1396  *      NET_RX_CN_HIGH  (high congestion)
1397  *      NET_RX_DROP     (packet was dropped)
1398  *
1399  */
1400
1401 int netif_rx(struct sk_buff *skb)
1402 {
1403         struct softnet_data *queue;
1404         unsigned long flags;
1405
1406         /* if netpoll wants it, pretend we never saw it */
1407         if (netpoll_rx(skb))
1408                 return NET_RX_DROP;
1409
1410         if (!skb->tstamp.off_sec)
1411                 net_timestamp(skb);
1412
1413         /*
1414          * The code is rearranged so that the path is the most
1415          * short when CPU is congested, but is still operating.
1416          */
1417         local_irq_save(flags);
1418         queue = &__get_cpu_var(softnet_data);
1419
1420         __get_cpu_var(netdev_rx_stat).total++;
1421         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1422                 if (queue->input_pkt_queue.qlen) {
1423 enqueue:
1424                         dev_hold(skb->dev);
1425                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1426                         local_irq_restore(flags);
1427                         return NET_RX_SUCCESS;
1428                 }
1429
1430                 netif_rx_schedule(&queue->backlog_dev);
1431                 goto enqueue;
1432         }
1433
1434         __get_cpu_var(netdev_rx_stat).dropped++;
1435         local_irq_restore(flags);
1436
1437         kfree_skb(skb);
1438         return NET_RX_DROP;
1439 }
1440
1441 int netif_rx_ni(struct sk_buff *skb)
1442 {
1443         int err;
1444
1445         preempt_disable();
1446         err = netif_rx(skb);
1447         if (local_softirq_pending())
1448                 do_softirq();
1449         preempt_enable();
1450
1451         return err;
1452 }
1453
1454 EXPORT_SYMBOL(netif_rx_ni);
1455
1456 static inline struct net_device *skb_bond(struct sk_buff *skb)
1457 {
1458         struct net_device *dev = skb->dev;
1459
1460         if (dev->master)
1461                 skb->dev = dev->master;
1462
1463         return dev;
1464 }
1465
1466 static void net_tx_action(struct softirq_action *h)
1467 {
1468         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1469
1470         if (sd->completion_queue) {
1471                 struct sk_buff *clist;
1472
1473                 local_irq_disable();
1474                 clist = sd->completion_queue;
1475                 sd->completion_queue = NULL;
1476                 local_irq_enable();
1477
1478                 while (clist) {
1479                         struct sk_buff *skb = clist;
1480                         clist = clist->next;
1481
1482                         BUG_TRAP(!atomic_read(&skb->users));
1483                         __kfree_skb(skb);
1484                 }
1485         }
1486
1487         if (sd->output_queue) {
1488                 struct net_device *head;
1489
1490                 local_irq_disable();
1491                 head = sd->output_queue;
1492                 sd->output_queue = NULL;
1493                 local_irq_enable();
1494
1495                 while (head) {
1496                         struct net_device *dev = head;
1497                         head = head->next_sched;
1498
1499                         smp_mb__before_clear_bit();
1500                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1501
1502                         if (spin_trylock(&dev->queue_lock)) {
1503                                 qdisc_run(dev);
1504                                 spin_unlock(&dev->queue_lock);
1505                         } else {
1506                                 netif_schedule(dev);
1507                         }
1508                 }
1509         }
1510 }
1511
1512 static __inline__ int deliver_skb(struct sk_buff *skb,
1513                                   struct packet_type *pt_prev,
1514                                   struct net_device *orig_dev)
1515 {
1516         atomic_inc(&skb->users);
1517         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1518 }
1519
1520 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1521 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1522 struct net_bridge;
1523 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1524                                                 unsigned char *addr);
1525 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1526
1527 static __inline__ int handle_bridge(struct sk_buff **pskb,
1528                                     struct packet_type **pt_prev, int *ret,
1529                                     struct net_device *orig_dev)
1530 {
1531         struct net_bridge_port *port;
1532
1533         if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1534             (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1535                 return 0;
1536
1537         if (*pt_prev) {
1538                 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1539                 *pt_prev = NULL;
1540         }
1541
1542         return br_handle_frame_hook(port, pskb);
1543 }
1544 #else
1545 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (0)
1546 #endif
1547
1548 #ifdef CONFIG_NET_CLS_ACT
1549 /* TODO: Maybe we should just force sch_ingress to be compiled in
1550  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1551  * a compare and 2 stores extra right now if we dont have it on
1552  * but have CONFIG_NET_CLS_ACT
1553  * NOTE: This doesnt stop any functionality; if you dont have
1554  * the ingress scheduler, you just cant add policies on ingress.
1555  *
1556  */
1557 static int ing_filter(struct sk_buff *skb)
1558 {
1559         struct Qdisc *q;
1560         struct net_device *dev = skb->dev;
1561         int result = TC_ACT_OK;
1562
1563         if (dev->qdisc_ingress) {
1564                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1565                 if (MAX_RED_LOOP < ttl++) {
1566                         printk("Redir loop detected Dropping packet (%s->%s)\n",
1567                                 skb->input_dev->name, skb->dev->name);
1568                         return TC_ACT_SHOT;
1569                 }
1570
1571                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1572
1573                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1574
1575                 spin_lock(&dev->ingress_lock);
1576                 if ((q = dev->qdisc_ingress) != NULL)
1577                         result = q->enqueue(skb, q);
1578                 spin_unlock(&dev->ingress_lock);
1579
1580         }
1581
1582         return result;
1583 }
1584 #endif
1585
1586 int netif_receive_skb(struct sk_buff *skb)
1587 {
1588         struct packet_type *ptype, *pt_prev;
1589         struct net_device *orig_dev;
1590         int ret = NET_RX_DROP;
1591         unsigned short type;
1592
1593         /* if we've gotten here through NAPI, check netpoll */
1594         if (skb->dev->poll && netpoll_rx(skb))
1595                 return NET_RX_DROP;
1596
1597         if (!skb->tstamp.off_sec)
1598                 net_timestamp(skb);
1599
1600         if (!skb->input_dev)
1601                 skb->input_dev = skb->dev;
1602
1603         orig_dev = skb_bond(skb);
1604
1605         __get_cpu_var(netdev_rx_stat).total++;
1606
1607         skb->h.raw = skb->nh.raw = skb->data;
1608         skb->mac_len = skb->nh.raw - skb->mac.raw;
1609
1610         pt_prev = NULL;
1611
1612         rcu_read_lock();
1613
1614 #ifdef CONFIG_NET_CLS_ACT
1615         if (skb->tc_verd & TC_NCLS) {
1616                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1617                 goto ncls;
1618         }
1619 #endif
1620
1621         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1622                 if (!ptype->dev || ptype->dev == skb->dev) {
1623                         if (pt_prev)
1624                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1625                         pt_prev = ptype;
1626                 }
1627         }
1628
1629 #ifdef CONFIG_NET_CLS_ACT
1630         if (pt_prev) {
1631                 ret = deliver_skb(skb, pt_prev, orig_dev);
1632                 pt_prev = NULL; /* noone else should process this after*/
1633         } else {
1634                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1635         }
1636
1637         ret = ing_filter(skb);
1638
1639         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1640                 kfree_skb(skb);
1641                 goto out;
1642         }
1643
1644         skb->tc_verd = 0;
1645 ncls:
1646 #endif
1647
1648         handle_diverter(skb);
1649
1650         if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1651                 goto out;
1652
1653         type = skb->protocol;
1654         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1655                 if (ptype->type == type &&
1656                     (!ptype->dev || ptype->dev == skb->dev)) {
1657                         if (pt_prev)
1658                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1659                         pt_prev = ptype;
1660                 }
1661         }
1662
1663         if (pt_prev) {
1664                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1665         } else {
1666                 kfree_skb(skb);
1667                 /* Jamal, now you will not able to escape explaining
1668                  * me how you were going to use this. :-)
1669                  */
1670                 ret = NET_RX_DROP;
1671         }
1672
1673 out:
1674         rcu_read_unlock();
1675         return ret;
1676 }
1677
1678 static int process_backlog(struct net_device *backlog_dev, int *budget)
1679 {
1680         int work = 0;
1681         int quota = min(backlog_dev->quota, *budget);
1682         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1683         unsigned long start_time = jiffies;
1684
1685         backlog_dev->weight = weight_p;
1686         for (;;) {
1687                 struct sk_buff *skb;
1688                 struct net_device *dev;
1689
1690                 local_irq_disable();
1691                 skb = __skb_dequeue(&queue->input_pkt_queue);
1692                 if (!skb)
1693                         goto job_done;
1694                 local_irq_enable();
1695
1696                 dev = skb->dev;
1697
1698                 netif_receive_skb(skb);
1699
1700                 dev_put(dev);
1701
1702                 work++;
1703
1704                 if (work >= quota || jiffies - start_time > 1)
1705                         break;
1706
1707         }
1708
1709         backlog_dev->quota -= work;
1710         *budget -= work;
1711         return -1;
1712
1713 job_done:
1714         backlog_dev->quota -= work;
1715         *budget -= work;
1716
1717         list_del(&backlog_dev->poll_list);
1718         smp_mb__before_clear_bit();
1719         netif_poll_enable(backlog_dev);
1720
1721         local_irq_enable();
1722         return 0;
1723 }
1724
1725 static void net_rx_action(struct softirq_action *h)
1726 {
1727         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1728         unsigned long start_time = jiffies;
1729         int budget = netdev_budget;
1730         void *have;
1731
1732         local_irq_disable();
1733
1734         while (!list_empty(&queue->poll_list)) {
1735                 struct net_device *dev;
1736
1737                 if (budget <= 0 || jiffies - start_time > 1)
1738                         goto softnet_break;
1739
1740                 local_irq_enable();
1741
1742                 dev = list_entry(queue->poll_list.next,
1743                                  struct net_device, poll_list);
1744                 have = netpoll_poll_lock(dev);
1745
1746                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1747                         netpoll_poll_unlock(have);
1748                         local_irq_disable();
1749                         list_del(&dev->poll_list);
1750                         list_add_tail(&dev->poll_list, &queue->poll_list);
1751                         if (dev->quota < 0)
1752                                 dev->quota += dev->weight;
1753                         else
1754                                 dev->quota = dev->weight;
1755                 } else {
1756                         netpoll_poll_unlock(have);
1757                         dev_put(dev);
1758                         local_irq_disable();
1759                 }
1760         }
1761 out:
1762         local_irq_enable();
1763         return;
1764
1765 softnet_break:
1766         __get_cpu_var(netdev_rx_stat).time_squeeze++;
1767         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1768         goto out;
1769 }
1770
1771 static gifconf_func_t * gifconf_list [NPROTO];
1772
1773 /**
1774  *      register_gifconf        -       register a SIOCGIF handler
1775  *      @family: Address family
1776  *      @gifconf: Function handler
1777  *
1778  *      Register protocol dependent address dumping routines. The handler
1779  *      that is passed must not be freed or reused until it has been replaced
1780  *      by another handler.
1781  */
1782 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1783 {
1784         if (family >= NPROTO)
1785                 return -EINVAL;
1786         gifconf_list[family] = gifconf;
1787         return 0;
1788 }
1789
1790
1791 /*
1792  *      Map an interface index to its name (SIOCGIFNAME)
1793  */
1794
1795 /*
1796  *      We need this ioctl for efficient implementation of the
1797  *      if_indextoname() function required by the IPv6 API.  Without
1798  *      it, we would have to search all the interfaces to find a
1799  *      match.  --pb
1800  */
1801
1802 static int dev_ifname(struct ifreq __user *arg)
1803 {
1804         struct net_device *dev;
1805         struct ifreq ifr;
1806
1807         /*
1808          *      Fetch the caller's info block.
1809          */
1810
1811         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1812                 return -EFAULT;
1813
1814         read_lock(&dev_base_lock);
1815         dev = __dev_get_by_index(ifr.ifr_ifindex);
1816         if (!dev) {
1817                 read_unlock(&dev_base_lock);
1818                 return -ENODEV;
1819         }
1820
1821         strcpy(ifr.ifr_name, dev->name);
1822         read_unlock(&dev_base_lock);
1823
1824         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1825                 return -EFAULT;
1826         return 0;
1827 }
1828
1829 /*
1830  *      Perform a SIOCGIFCONF call. This structure will change
1831  *      size eventually, and there is nothing I can do about it.
1832  *      Thus we will need a 'compatibility mode'.
1833  */
1834
1835 static int dev_ifconf(char __user *arg)
1836 {
1837         struct ifconf ifc;
1838         struct net_device *dev;
1839         char __user *pos;
1840         int len;
1841         int total;
1842         int i;
1843
1844         /*
1845          *      Fetch the caller's info block.
1846          */
1847
1848         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1849                 return -EFAULT;
1850
1851         pos = ifc.ifc_buf;
1852         len = ifc.ifc_len;
1853
1854         /*
1855          *      Loop over the interfaces, and write an info block for each.
1856          */
1857
1858         total = 0;
1859         for (dev = dev_base; dev; dev = dev->next) {
1860                 if (vx_flags(VXF_HIDE_NETIF, 0) &&
1861                         !dev_in_nx_info(dev, current->nx_info))
1862                         continue;
1863                 for (i = 0; i < NPROTO; i++) {
1864                         if (gifconf_list[i]) {
1865                                 int done;
1866                                 if (!pos)
1867                                         done = gifconf_list[i](dev, NULL, 0);
1868                                 else
1869                                         done = gifconf_list[i](dev, pos + total,
1870                                                                len - total);
1871                                 if (done < 0)
1872                                         return -EFAULT;
1873                                 total += done;
1874                         }
1875                 }
1876         }
1877
1878         /*
1879          *      All done.  Write the updated control block back to the caller.
1880          */
1881         ifc.ifc_len = total;
1882
1883         /*
1884          *      Both BSD and Solaris return 0 here, so we do too.
1885          */
1886         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1887 }
1888
1889 #ifdef CONFIG_PROC_FS
1890 /*
1891  *      This is invoked by the /proc filesystem handler to display a device
1892  *      in detail.
1893  */
1894 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1895 {
1896         struct net_device *dev;
1897         loff_t i;
1898
1899         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1900
1901         return i == pos ? dev : NULL;
1902 }
1903
1904 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1905 {
1906         read_lock(&dev_base_lock);
1907         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1908 }
1909
1910 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1911 {
1912         ++*pos;
1913         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1914 }
1915
1916 void dev_seq_stop(struct seq_file *seq, void *v)
1917 {
1918         read_unlock(&dev_base_lock);
1919 }
1920
1921 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1922 {
1923         struct nx_info *nxi = current->nx_info;
1924
1925         if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
1926                 return;
1927         if (dev->get_stats) {
1928                 struct net_device_stats *stats = dev->get_stats(dev);
1929
1930                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1931                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1932                            dev->name, stats->rx_bytes, stats->rx_packets,
1933                            stats->rx_errors,
1934                            stats->rx_dropped + stats->rx_missed_errors,
1935                            stats->rx_fifo_errors,
1936                            stats->rx_length_errors + stats->rx_over_errors +
1937                              stats->rx_crc_errors + stats->rx_frame_errors,
1938                            stats->rx_compressed, stats->multicast,
1939                            stats->tx_bytes, stats->tx_packets,
1940                            stats->tx_errors, stats->tx_dropped,
1941                            stats->tx_fifo_errors, stats->collisions,
1942                            stats->tx_carrier_errors +
1943                              stats->tx_aborted_errors +
1944                              stats->tx_window_errors +
1945                              stats->tx_heartbeat_errors,
1946                            stats->tx_compressed);
1947         } else
1948                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1949 }
1950
1951 /*
1952  *      Called from the PROCfs module. This now uses the new arbitrary sized
1953  *      /proc/net interface to create /proc/net/dev
1954  */
1955 static int dev_seq_show(struct seq_file *seq, void *v)
1956 {
1957         if (v == SEQ_START_TOKEN)
1958                 seq_puts(seq, "Inter-|   Receive                            "
1959                               "                    |  Transmit\n"
1960                               " face |bytes    packets errs drop fifo frame "
1961                               "compressed multicast|bytes    packets errs "
1962                               "drop fifo colls carrier compressed\n");
1963         else
1964                 dev_seq_printf_stats(seq, v);
1965         return 0;
1966 }
1967
1968 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1969 {
1970         struct netif_rx_stats *rc = NULL;
1971
1972         while (*pos < NR_CPUS)
1973                 if (cpu_online(*pos)) {
1974                         rc = &per_cpu(netdev_rx_stat, *pos);
1975                         break;
1976                 } else
1977                         ++*pos;
1978         return rc;
1979 }
1980
1981 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1982 {
1983         return softnet_get_online(pos);
1984 }
1985
1986 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987 {
1988         ++*pos;
1989         return softnet_get_online(pos);
1990 }
1991
1992 static void softnet_seq_stop(struct seq_file *seq, void *v)
1993 {
1994 }
1995
1996 static int softnet_seq_show(struct seq_file *seq, void *v)
1997 {
1998         struct netif_rx_stats *s = v;
1999
2000         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2001                    s->total, s->dropped, s->time_squeeze, 0,
2002                    0, 0, 0, 0, /* was fastroute */
2003                    s->cpu_collision );
2004         return 0;
2005 }
2006
2007 static struct seq_operations dev_seq_ops = {
2008         .start = dev_seq_start,
2009         .next  = dev_seq_next,
2010         .stop  = dev_seq_stop,
2011         .show  = dev_seq_show,
2012 };
2013
2014 static int dev_seq_open(struct inode *inode, struct file *file)
2015 {
2016         return seq_open(file, &dev_seq_ops);
2017 }
2018
2019 static struct file_operations dev_seq_fops = {
2020         .owner   = THIS_MODULE,
2021         .open    = dev_seq_open,
2022         .read    = seq_read,
2023         .llseek  = seq_lseek,
2024         .release = seq_release,
2025 };
2026
2027 static struct seq_operations softnet_seq_ops = {
2028         .start = softnet_seq_start,
2029         .next  = softnet_seq_next,
2030         .stop  = softnet_seq_stop,
2031         .show  = softnet_seq_show,
2032 };
2033
2034 static int softnet_seq_open(struct inode *inode, struct file *file)
2035 {
2036         return seq_open(file, &softnet_seq_ops);
2037 }
2038
2039 static struct file_operations softnet_seq_fops = {
2040         .owner   = THIS_MODULE,
2041         .open    = softnet_seq_open,
2042         .read    = seq_read,
2043         .llseek  = seq_lseek,
2044         .release = seq_release,
2045 };
2046
2047 #ifdef WIRELESS_EXT
2048 extern int wireless_proc_init(void);
2049 #else
2050 #define wireless_proc_init() 0
2051 #endif
2052
2053 static int __init dev_proc_init(void)
2054 {
2055         int rc = -ENOMEM;
2056
2057         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2058                 goto out;
2059         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2060                 goto out_dev;
2061         if (wireless_proc_init())
2062                 goto out_softnet;
2063         rc = 0;
2064 out:
2065         return rc;
2066 out_softnet:
2067         proc_net_remove("softnet_stat");
2068 out_dev:
2069         proc_net_remove("dev");
2070         goto out;
2071 }
2072 #else
2073 #define dev_proc_init() 0
2074 #endif  /* CONFIG_PROC_FS */
2075
2076
2077 /**
2078  *      netdev_set_master       -       set up master/slave pair
2079  *      @slave: slave device
2080  *      @master: new master device
2081  *
2082  *      Changes the master device of the slave. Pass %NULL to break the
2083  *      bonding. The caller must hold the RTNL semaphore. On a failure
2084  *      a negative errno code is returned. On success the reference counts
2085  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2086  *      function returns zero.
2087  */
2088 int netdev_set_master(struct net_device *slave, struct net_device *master)
2089 {
2090         struct net_device *old = slave->master;
2091
2092         ASSERT_RTNL();
2093
2094         if (master) {
2095                 if (old)
2096                         return -EBUSY;
2097                 dev_hold(master);
2098         }
2099
2100         slave->master = master;
2101
2102         synchronize_net();
2103
2104         if (old)
2105                 dev_put(old);
2106
2107         if (master)
2108                 slave->flags |= IFF_SLAVE;
2109         else
2110                 slave->flags &= ~IFF_SLAVE;
2111
2112         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2113         return 0;
2114 }
2115
2116 /**
2117  *      dev_set_promiscuity     - update promiscuity count on a device
2118  *      @dev: device
2119  *      @inc: modifier
2120  *
2121  *      Add or remove promsicuity from a device. While the count in the device
2122  *      remains above zero the interface remains promiscuous. Once it hits zero
2123  *      the device reverts back to normal filtering operation. A negative inc
2124  *      value is used to drop promiscuity on the device.
2125  */
2126 void dev_set_promiscuity(struct net_device *dev, int inc)
2127 {
2128         unsigned short old_flags = dev->flags;
2129
2130         if ((dev->promiscuity += inc) == 0)
2131                 dev->flags &= ~IFF_PROMISC;
2132         else
2133                 dev->flags |= IFF_PROMISC;
2134         if (dev->flags != old_flags) {
2135                 dev_mc_upload(dev);
2136                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2137                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2138                                                                "left");
2139         }
2140 }
2141
2142 /**
2143  *      dev_set_allmulti        - update allmulti count on a device
2144  *      @dev: device
2145  *      @inc: modifier
2146  *
2147  *      Add or remove reception of all multicast frames to a device. While the
2148  *      count in the device remains above zero the interface remains listening
2149  *      to all interfaces. Once it hits zero the device reverts back to normal
2150  *      filtering operation. A negative @inc value is used to drop the counter
2151  *      when releasing a resource needing all multicasts.
2152  */
2153
2154 void dev_set_allmulti(struct net_device *dev, int inc)
2155 {
2156         unsigned short old_flags = dev->flags;
2157
2158         dev->flags |= IFF_ALLMULTI;
2159         if ((dev->allmulti += inc) == 0)
2160                 dev->flags &= ~IFF_ALLMULTI;
2161         if (dev->flags ^ old_flags)
2162                 dev_mc_upload(dev);
2163 }
2164
2165 unsigned dev_get_flags(const struct net_device *dev)
2166 {
2167         unsigned flags;
2168
2169         flags = (dev->flags & ~(IFF_PROMISC |
2170                                 IFF_ALLMULTI |
2171                                 IFF_RUNNING)) |
2172                 (dev->gflags & (IFF_PROMISC |
2173                                 IFF_ALLMULTI));
2174
2175         if (netif_running(dev) && netif_carrier_ok(dev))
2176                 flags |= IFF_RUNNING;
2177
2178         return flags;
2179 }
2180
2181 int dev_change_flags(struct net_device *dev, unsigned flags)
2182 {
2183         int ret;
2184         int old_flags = dev->flags;
2185
2186         /*
2187          *      Set the flags on our device.
2188          */
2189
2190         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2191                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2192                                IFF_AUTOMEDIA)) |
2193                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2194                                     IFF_ALLMULTI));
2195
2196         /*
2197          *      Load in the correct multicast list now the flags have changed.
2198          */
2199
2200         dev_mc_upload(dev);
2201
2202         /*
2203          *      Have we downed the interface. We handle IFF_UP ourselves
2204          *      according to user attempts to set it, rather than blindly
2205          *      setting it.
2206          */
2207
2208         ret = 0;
2209         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2210                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2211
2212                 if (!ret)
2213                         dev_mc_upload(dev);
2214         }
2215
2216         if (dev->flags & IFF_UP &&
2217             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2218                                           IFF_VOLATILE)))
2219                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2220
2221         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2222                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2223                 dev->gflags ^= IFF_PROMISC;
2224                 dev_set_promiscuity(dev, inc);
2225         }
2226
2227         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2228            is important. Some (broken) drivers set IFF_PROMISC, when
2229            IFF_ALLMULTI is requested not asking us and not reporting.
2230          */
2231         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2232                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2233                 dev->gflags ^= IFF_ALLMULTI;
2234                 dev_set_allmulti(dev, inc);
2235         }
2236
2237         if (old_flags ^ dev->flags)
2238                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2239
2240         return ret;
2241 }
2242
2243 int dev_set_mtu(struct net_device *dev, int new_mtu)
2244 {
2245         int err;
2246
2247         if (new_mtu == dev->mtu)
2248                 return 0;
2249
2250         /*      MTU must be positive.    */
2251         if (new_mtu < 0)
2252                 return -EINVAL;
2253
2254         if (!netif_device_present(dev))
2255                 return -ENODEV;
2256
2257         err = 0;
2258         if (dev->change_mtu)
2259                 err = dev->change_mtu(dev, new_mtu);
2260         else
2261                 dev->mtu = new_mtu;
2262         if (!err && dev->flags & IFF_UP)
2263                 notifier_call_chain(&netdev_chain,
2264                                     NETDEV_CHANGEMTU, dev);
2265         return err;
2266 }
2267
2268 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2269 {
2270         int err;
2271
2272         if (!dev->set_mac_address)
2273                 return -EOPNOTSUPP;
2274         if (sa->sa_family != dev->type)
2275                 return -EINVAL;
2276         if (!netif_device_present(dev))
2277                 return -ENODEV;
2278         err = dev->set_mac_address(dev, sa);
2279         if (!err)
2280                 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2281         return err;
2282 }
2283
2284 /*
2285  *      Perform the SIOCxIFxxx calls.
2286  */
2287 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2288 {
2289         int err;
2290         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2291
2292         if (!dev)
2293                 return -ENODEV;
2294
2295         switch (cmd) {
2296                 case SIOCGIFFLAGS:      /* Get interface flags */
2297                         ifr->ifr_flags = dev_get_flags(dev);
2298                         return 0;
2299
2300                 case SIOCSIFFLAGS:      /* Set interface flags */
2301                         return dev_change_flags(dev, ifr->ifr_flags);
2302
2303                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2304                                            (currently unused) */
2305                         ifr->ifr_metric = 0;
2306                         return 0;
2307
2308                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2309                                            (currently unused) */
2310                         return -EOPNOTSUPP;
2311
2312                 case SIOCGIFMTU:        /* Get the MTU of a device */
2313                         ifr->ifr_mtu = dev->mtu;
2314                         return 0;
2315
2316                 case SIOCSIFMTU:        /* Set the MTU of a device */
2317                         return dev_set_mtu(dev, ifr->ifr_mtu);
2318
2319                 case SIOCGIFHWADDR:
2320                         if (!dev->addr_len)
2321                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2322                         else
2323                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2324                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2325                         ifr->ifr_hwaddr.sa_family = dev->type;
2326                         return 0;
2327
2328                 case SIOCSIFHWADDR:
2329                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2330
2331                 case SIOCSIFHWBROADCAST:
2332                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2333                                 return -EINVAL;
2334                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2335                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2336                         notifier_call_chain(&netdev_chain,
2337                                             NETDEV_CHANGEADDR, dev);
2338                         return 0;
2339
2340                 case SIOCGIFMAP:
2341                         ifr->ifr_map.mem_start = dev->mem_start;
2342                         ifr->ifr_map.mem_end   = dev->mem_end;
2343                         ifr->ifr_map.base_addr = dev->base_addr;
2344                         ifr->ifr_map.irq       = dev->irq;
2345                         ifr->ifr_map.dma       = dev->dma;
2346                         ifr->ifr_map.port      = dev->if_port;
2347                         return 0;
2348
2349                 case SIOCSIFMAP:
2350                         if (dev->set_config) {
2351                                 if (!netif_device_present(dev))
2352                                         return -ENODEV;
2353                                 return dev->set_config(dev, &ifr->ifr_map);
2354                         }
2355                         return -EOPNOTSUPP;
2356
2357                 case SIOCADDMULTI:
2358                         if (!dev->set_multicast_list ||
2359                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2360                                 return -EINVAL;
2361                         if (!netif_device_present(dev))
2362                                 return -ENODEV;
2363                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2364                                           dev->addr_len, 1);
2365
2366                 case SIOCDELMULTI:
2367                         if (!dev->set_multicast_list ||
2368                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2369                                 return -EINVAL;
2370                         if (!netif_device_present(dev))
2371                                 return -ENODEV;
2372                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2373                                              dev->addr_len, 1);
2374
2375                 case SIOCGIFINDEX:
2376                         ifr->ifr_ifindex = dev->ifindex;
2377                         return 0;
2378
2379                 case SIOCGIFTXQLEN:
2380                         ifr->ifr_qlen = dev->tx_queue_len;
2381                         return 0;
2382
2383                 case SIOCSIFTXQLEN:
2384                         if (ifr->ifr_qlen < 0)
2385                                 return -EINVAL;
2386                         dev->tx_queue_len = ifr->ifr_qlen;
2387                         return 0;
2388
2389                 case SIOCSIFNAME:
2390                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2391                         return dev_change_name(dev, ifr->ifr_newname);
2392
2393                 /*
2394                  *      Unknown or private ioctl
2395                  */
2396
2397                 default:
2398                         if ((cmd >= SIOCDEVPRIVATE &&
2399                             cmd <= SIOCDEVPRIVATE + 15) ||
2400                             cmd == SIOCBONDENSLAVE ||
2401                             cmd == SIOCBONDRELEASE ||
2402                             cmd == SIOCBONDSETHWADDR ||
2403                             cmd == SIOCBONDSLAVEINFOQUERY ||
2404                             cmd == SIOCBONDINFOQUERY ||
2405                             cmd == SIOCBONDCHANGEACTIVE ||
2406                             cmd == SIOCGMIIPHY ||
2407                             cmd == SIOCGMIIREG ||
2408                             cmd == SIOCSMIIREG ||
2409                             cmd == SIOCBRADDIF ||
2410                             cmd == SIOCBRDELIF ||
2411                             cmd == SIOCWANDEV) {
2412                                 err = -EOPNOTSUPP;
2413                                 if (dev->do_ioctl) {
2414                                         if (netif_device_present(dev))
2415                                                 err = dev->do_ioctl(dev, ifr,
2416                                                                     cmd);
2417                                         else
2418                                                 err = -ENODEV;
2419                                 }
2420                         } else
2421                                 err = -EINVAL;
2422
2423         }
2424         return err;
2425 }
2426
2427 /*
2428  *      This function handles all "interface"-type I/O control requests. The actual
2429  *      'doing' part of this is dev_ifsioc above.
2430  */
2431
2432 /**
2433  *      dev_ioctl       -       network device ioctl
2434  *      @cmd: command to issue
2435  *      @arg: pointer to a struct ifreq in user space
2436  *
2437  *      Issue ioctl functions to devices. This is normally called by the
2438  *      user space syscall interfaces but can sometimes be useful for
2439  *      other purposes. The return value is the return from the syscall if
2440  *      positive or a negative errno code on error.
2441  */
2442
2443 int dev_ioctl(unsigned int cmd, void __user *arg)
2444 {
2445         struct ifreq ifr;
2446         int ret;
2447         char *colon;
2448
2449         /* One special case: SIOCGIFCONF takes ifconf argument
2450            and requires shared lock, because it sleeps writing
2451            to user space.
2452          */
2453
2454         if (cmd == SIOCGIFCONF) {
2455                 rtnl_shlock();
2456                 ret = dev_ifconf((char __user *) arg);
2457                 rtnl_shunlock();
2458                 return ret;
2459         }
2460         if (cmd == SIOCGIFNAME)
2461                 return dev_ifname((struct ifreq __user *)arg);
2462
2463         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2464                 return -EFAULT;
2465
2466         ifr.ifr_name[IFNAMSIZ-1] = 0;
2467
2468         colon = strchr(ifr.ifr_name, ':');
2469         if (colon)
2470                 *colon = 0;
2471
2472         /*
2473          *      See which interface the caller is talking about.
2474          */
2475
2476         switch (cmd) {
2477                 /*
2478                  *      These ioctl calls:
2479                  *      - can be done by all.
2480                  *      - atomic and do not require locking.
2481                  *      - return a value
2482                  */
2483                 case SIOCGIFFLAGS:
2484                 case SIOCGIFMETRIC:
2485                 case SIOCGIFMTU:
2486                 case SIOCGIFHWADDR:
2487                 case SIOCGIFSLAVE:
2488                 case SIOCGIFMAP:
2489                 case SIOCGIFINDEX:
2490                 case SIOCGIFTXQLEN:
2491                         dev_load(ifr.ifr_name);
2492                         read_lock(&dev_base_lock);
2493                         ret = dev_ifsioc(&ifr, cmd);
2494                         read_unlock(&dev_base_lock);
2495                         if (!ret) {
2496                                 if (colon)
2497                                         *colon = ':';
2498                                 if (copy_to_user(arg, &ifr,
2499                                                  sizeof(struct ifreq)))
2500                                         ret = -EFAULT;
2501                         }
2502                         return ret;
2503
2504                 case SIOCETHTOOL:
2505                         dev_load(ifr.ifr_name);
2506                         rtnl_lock();
2507                         ret = dev_ethtool(&ifr);
2508                         rtnl_unlock();
2509                         if (!ret) {
2510                                 if (colon)
2511                                         *colon = ':';
2512                                 if (copy_to_user(arg, &ifr,
2513                                                  sizeof(struct ifreq)))
2514                                         ret = -EFAULT;
2515                         }
2516                         return ret;
2517
2518                 /*
2519                  *      These ioctl calls:
2520                  *      - require superuser power.
2521                  *      - require strict serialization.
2522                  *      - return a value
2523                  */
2524                 case SIOCGMIIPHY:
2525                 case SIOCGMIIREG:
2526                 case SIOCSIFNAME:
2527                         if (!capable(CAP_NET_ADMIN))
2528                                 return -EPERM;
2529                         dev_load(ifr.ifr_name);
2530                         rtnl_lock();
2531                         ret = dev_ifsioc(&ifr, cmd);
2532                         rtnl_unlock();
2533                         if (!ret) {
2534                                 if (colon)
2535                                         *colon = ':';
2536                                 if (copy_to_user(arg, &ifr,
2537                                                  sizeof(struct ifreq)))
2538                                         ret = -EFAULT;
2539                         }
2540                         return ret;
2541
2542                 /*
2543                  *      These ioctl calls:
2544                  *      - require superuser power.
2545                  *      - require strict serialization.
2546                  *      - do not return a value
2547                  */
2548                 case SIOCSIFFLAGS:
2549                 case SIOCSIFMETRIC:
2550                 case SIOCSIFMTU:
2551                 case SIOCSIFMAP:
2552                 case SIOCSIFHWADDR:
2553                 case SIOCSIFSLAVE:
2554                 case SIOCADDMULTI:
2555                 case SIOCDELMULTI:
2556                 case SIOCSIFHWBROADCAST:
2557                 case SIOCSIFTXQLEN:
2558                 case SIOCSMIIREG:
2559                 case SIOCBONDENSLAVE:
2560                 case SIOCBONDRELEASE:
2561                 case SIOCBONDSETHWADDR:
2562                 case SIOCBONDCHANGEACTIVE:
2563                 case SIOCBRADDIF:
2564                 case SIOCBRDELIF:
2565                         if (!capable(CAP_NET_ADMIN))
2566                                 return -EPERM;
2567                         /* fall through */
2568                 case SIOCBONDSLAVEINFOQUERY:
2569                 case SIOCBONDINFOQUERY:
2570                         dev_load(ifr.ifr_name);
2571                         rtnl_lock();
2572                         ret = dev_ifsioc(&ifr, cmd);
2573                         rtnl_unlock();
2574                         return ret;
2575
2576                 case SIOCGIFMEM:
2577                         /* Get the per device memory space. We can add this but
2578                          * currently do not support it */
2579                 case SIOCSIFMEM:
2580                         /* Set the per device memory buffer space.
2581                          * Not applicable in our case */
2582                 case SIOCSIFLINK:
2583                         return -EINVAL;
2584
2585                 /*
2586                  *      Unknown or private ioctl.
2587                  */
2588                 default:
2589                         if (cmd == SIOCWANDEV ||
2590                             (cmd >= SIOCDEVPRIVATE &&
2591                              cmd <= SIOCDEVPRIVATE + 15)) {
2592                                 dev_load(ifr.ifr_name);
2593                                 rtnl_lock();
2594                                 ret = dev_ifsioc(&ifr, cmd);
2595                                 rtnl_unlock();
2596                                 if (!ret && copy_to_user(arg, &ifr,
2597                                                          sizeof(struct ifreq)))
2598                                         ret = -EFAULT;
2599                                 return ret;
2600                         }
2601 #ifdef WIRELESS_EXT
2602                         /* Take care of Wireless Extensions */
2603                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2604                                 /* If command is `set a parameter', or
2605                                  * `get the encoding parameters', check if
2606                                  * the user has the right to do it */
2607                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2608                                         if (!capable(CAP_NET_ADMIN))
2609                                                 return -EPERM;
2610                                 }
2611                                 dev_load(ifr.ifr_name);
2612                                 rtnl_lock();
2613                                 /* Follow me in net/core/wireless.c */
2614                                 ret = wireless_process_ioctl(&ifr, cmd);
2615                                 rtnl_unlock();
2616                                 if (IW_IS_GET(cmd) &&
2617                                     copy_to_user(arg, &ifr,
2618                                                  sizeof(struct ifreq)))
2619                                         ret = -EFAULT;
2620                                 return ret;
2621                         }
2622 #endif  /* WIRELESS_EXT */
2623                         return -EINVAL;
2624         }
2625 }
2626
2627
2628 /**
2629  *      dev_new_index   -       allocate an ifindex
2630  *
2631  *      Returns a suitable unique value for a new device interface
2632  *      number.  The caller must hold the rtnl semaphore or the
2633  *      dev_base_lock to be sure it remains unique.
2634  */
2635 static int dev_new_index(void)
2636 {
2637         static int ifindex;
2638         for (;;) {
2639                 if (++ifindex <= 0)
2640                         ifindex = 1;
2641                 if (!__dev_get_by_index(ifindex))
2642                         return ifindex;
2643         }
2644 }
2645
2646 static int dev_boot_phase = 1;
2647
2648 /* Delayed registration/unregisteration */
2649 static DEFINE_SPINLOCK(net_todo_list_lock);
2650 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2651
2652 static inline void net_set_todo(struct net_device *dev)
2653 {
2654         spin_lock(&net_todo_list_lock);
2655         list_add_tail(&dev->todo_list, &net_todo_list);
2656         spin_unlock(&net_todo_list_lock);
2657 }
2658
2659 /**
2660  *      register_netdevice      - register a network device
2661  *      @dev: device to register
2662  *
2663  *      Take a completed network device structure and add it to the kernel
2664  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2665  *      chain. 0 is returned on success. A negative errno code is returned
2666  *      on a failure to set up the device, or if the name is a duplicate.
2667  *
2668  *      Callers must hold the rtnl semaphore. You may want
2669  *      register_netdev() instead of this.
2670  *
2671  *      BUGS:
2672  *      The locking appears insufficient to guarantee two parallel registers
2673  *      will not get the same name.
2674  */
2675
2676 int register_netdevice(struct net_device *dev)
2677 {
2678         struct hlist_head *head;
2679         struct hlist_node *p;
2680         int ret;
2681
2682         BUG_ON(dev_boot_phase);
2683         ASSERT_RTNL();
2684
2685         /* When net_device's are persistent, this will be fatal. */
2686         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2687
2688         spin_lock_init(&dev->queue_lock);
2689         spin_lock_init(&dev->xmit_lock);
2690         dev->xmit_lock_owner = -1;
2691 #ifdef CONFIG_NET_CLS_ACT
2692         spin_lock_init(&dev->ingress_lock);
2693 #endif
2694
2695         ret = alloc_divert_blk(dev);
2696         if (ret)
2697                 goto out;
2698
2699         dev->iflink = -1;
2700
2701         /* Init, if this function is available */
2702         if (dev->init) {
2703                 ret = dev->init(dev);
2704                 if (ret) {
2705                         if (ret > 0)
2706                                 ret = -EIO;
2707                         goto out_err;
2708                 }
2709         }
2710
2711         if (!dev_valid_name(dev->name)) {
2712                 ret = -EINVAL;
2713                 goto out_err;
2714         }
2715
2716         dev->ifindex = dev_new_index();
2717         if (dev->iflink == -1)
2718                 dev->iflink = dev->ifindex;
2719
2720         /* Check for existence of name */
2721         head = dev_name_hash(dev->name);
2722         hlist_for_each(p, head) {
2723                 struct net_device *d
2724                         = hlist_entry(p, struct net_device, name_hlist);
2725                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2726                         ret = -EEXIST;
2727                         goto out_err;
2728                 }
2729         }
2730
2731         /* Fix illegal SG+CSUM combinations. */
2732         if ((dev->features & NETIF_F_SG) &&
2733             !(dev->features & (NETIF_F_IP_CSUM |
2734                                NETIF_F_NO_CSUM |
2735                                NETIF_F_HW_CSUM))) {
2736                 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2737                        dev->name);
2738                 dev->features &= ~NETIF_F_SG;
2739         }
2740
2741         /* TSO requires that SG is present as well. */
2742         if ((dev->features & NETIF_F_TSO) &&
2743             !(dev->features & NETIF_F_SG)) {
2744                 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2745                        dev->name);
2746                 dev->features &= ~NETIF_F_TSO;
2747         }
2748         if (dev->features & NETIF_F_UFO) {
2749                 if (!(dev->features & NETIF_F_HW_CSUM)) {
2750                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2751                                         "NETIF_F_HW_CSUM feature.\n",
2752                                                         dev->name);
2753                         dev->features &= ~NETIF_F_UFO;
2754                 }
2755                 if (!(dev->features & NETIF_F_SG)) {
2756                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2757                                         "NETIF_F_SG feature.\n",
2758                                         dev->name);
2759                         dev->features &= ~NETIF_F_UFO;
2760                 }
2761         }
2762
2763         /*
2764          *      nil rebuild_header routine,
2765          *      that should be never called and used as just bug trap.
2766          */
2767
2768         if (!dev->rebuild_header)
2769                 dev->rebuild_header = default_rebuild_header;
2770
2771         /*
2772          *      Default initial state at registry is that the
2773          *      device is present.
2774          */
2775
2776         set_bit(__LINK_STATE_PRESENT, &dev->state);
2777
2778         dev->next = NULL;
2779         dev_init_scheduler(dev);
2780         write_lock_bh(&dev_base_lock);
2781         *dev_tail = dev;
2782         dev_tail = &dev->next;
2783         hlist_add_head(&dev->name_hlist, head);
2784         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2785         dev_hold(dev);
2786         dev->reg_state = NETREG_REGISTERING;
2787         write_unlock_bh(&dev_base_lock);
2788
2789         /* Notify protocols, that a new device appeared. */
2790         notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2791
2792         /* Finish registration after unlock */
2793         net_set_todo(dev);
2794         ret = 0;
2795
2796 out:
2797         return ret;
2798 out_err:
2799         free_divert_blk(dev);
2800         goto out;
2801 }
2802
2803 /**
2804  *      register_netdev - register a network device
2805  *      @dev: device to register
2806  *
2807  *      Take a completed network device structure and add it to the kernel
2808  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2809  *      chain. 0 is returned on success. A negative errno code is returned
2810  *      on a failure to set up the device, or if the name is a duplicate.
2811  *
2812  *      This is a wrapper around register_netdev that takes the rtnl semaphore
2813  *      and expands the device name if you passed a format string to
2814  *      alloc_netdev.
2815  */
2816 int register_netdev(struct net_device *dev)
2817 {
2818         int err;
2819
2820         rtnl_lock();
2821
2822         /*
2823          * If the name is a format string the caller wants us to do a
2824          * name allocation.
2825          */
2826         if (strchr(dev->name, '%')) {
2827                 err = dev_alloc_name(dev, dev->name);
2828                 if (err < 0)
2829                         goto out;
2830         }
2831
2832         /*
2833          * Back compatibility hook. Kill this one in 2.5
2834          */
2835         if (dev->name[0] == 0 || dev->name[0] == ' ') {
2836                 err = dev_alloc_name(dev, "eth%d");
2837                 if (err < 0)
2838                         goto out;
2839         }
2840
2841         err = register_netdevice(dev);
2842 out:
2843         rtnl_unlock();
2844         return err;
2845 }
2846 EXPORT_SYMBOL(register_netdev);
2847
2848 /*
2849  * netdev_wait_allrefs - wait until all references are gone.
2850  *
2851  * This is called when unregistering network devices.
2852  *
2853  * Any protocol or device that holds a reference should register
2854  * for netdevice notification, and cleanup and put back the
2855  * reference if they receive an UNREGISTER event.
2856  * We can get stuck here if buggy protocols don't correctly
2857  * call dev_put.
2858  */
2859 static void netdev_wait_allrefs(struct net_device *dev)
2860 {
2861         unsigned long rebroadcast_time, warning_time;
2862
2863         rebroadcast_time = warning_time = jiffies;
2864         while (atomic_read(&dev->refcnt) != 0) {
2865                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2866                         rtnl_shlock();
2867
2868                         /* Rebroadcast unregister notification */
2869                         notifier_call_chain(&netdev_chain,
2870                                             NETDEV_UNREGISTER, dev);
2871
2872                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2873                                      &dev->state)) {
2874                                 /* We must not have linkwatch events
2875                                  * pending on unregister. If this
2876                                  * happens, we simply run the queue
2877                                  * unscheduled, resulting in a noop
2878                                  * for this device.
2879                                  */
2880                                 linkwatch_run_queue();
2881                         }
2882
2883                         rtnl_shunlock();
2884
2885                         rebroadcast_time = jiffies;
2886                 }
2887
2888                 msleep(250);
2889
2890                 if (time_after(jiffies, warning_time + 10 * HZ)) {
2891                         printk(KERN_EMERG "unregister_netdevice: "
2892                                "waiting for %s to become free. Usage "
2893                                "count = %d\n",
2894                                dev->name, atomic_read(&dev->refcnt));
2895                         warning_time = jiffies;
2896                 }
2897         }
2898 }
2899
2900 /* The sequence is:
2901  *
2902  *      rtnl_lock();
2903  *      ...
2904  *      register_netdevice(x1);
2905  *      register_netdevice(x2);
2906  *      ...
2907  *      unregister_netdevice(y1);
2908  *      unregister_netdevice(y2);
2909  *      ...
2910  *      rtnl_unlock();
2911  *      free_netdev(y1);
2912  *      free_netdev(y2);
2913  *
2914  * We are invoked by rtnl_unlock() after it drops the semaphore.
2915  * This allows us to deal with problems:
2916  * 1) We can create/delete sysfs objects which invoke hotplug
2917  *    without deadlocking with linkwatch via keventd.
2918  * 2) Since we run with the RTNL semaphore not held, we can sleep
2919  *    safely in order to wait for the netdev refcnt to drop to zero.
2920  */
2921 static DECLARE_MUTEX(net_todo_run_mutex);
2922 void netdev_run_todo(void)
2923 {
2924         struct list_head list = LIST_HEAD_INIT(list);
2925         int err;
2926
2927
2928         /* Need to guard against multiple cpu's getting out of order. */
2929         down(&net_todo_run_mutex);
2930
2931         /* Not safe to do outside the semaphore.  We must not return
2932          * until all unregister events invoked by the local processor
2933          * have been completed (either by this todo run, or one on
2934          * another cpu).
2935          */
2936         if (list_empty(&net_todo_list))
2937                 goto out;
2938
2939         /* Snapshot list, allow later requests */
2940         spin_lock(&net_todo_list_lock);
2941         list_splice_init(&net_todo_list, &list);
2942         spin_unlock(&net_todo_list_lock);
2943
2944         while (!list_empty(&list)) {
2945                 struct net_device *dev
2946                         = list_entry(list.next, struct net_device, todo_list);
2947                 list_del(&dev->todo_list);
2948
2949                 switch(dev->reg_state) {
2950                 case NETREG_REGISTERING:
2951                         dev->reg_state = NETREG_REGISTERED;
2952                         err = netdev_register_sysfs(dev);
2953                         if (err)
2954                                 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2955                                        dev->name, err);
2956                         break;
2957
2958                 case NETREG_UNREGISTERING:
2959                         netdev_unregister_sysfs(dev);
2960                         dev->reg_state = NETREG_UNREGISTERED;
2961
2962                         netdev_wait_allrefs(dev);
2963
2964                         /* paranoia */
2965                         BUG_ON(atomic_read(&dev->refcnt));
2966                         BUG_TRAP(!dev->ip_ptr);
2967                         BUG_TRAP(!dev->ip6_ptr);
2968                         BUG_TRAP(!dev->dn_ptr);
2969
2970
2971                         /* It must be the very last action,
2972                          * after this 'dev' may point to freed up memory.
2973                          */
2974                         if (dev->destructor)
2975                                 dev->destructor(dev);
2976                         break;
2977
2978                 default:
2979                         printk(KERN_ERR "network todo '%s' but state %d\n",
2980                                dev->name, dev->reg_state);
2981                         break;
2982                 }
2983         }
2984
2985 out:
2986         up(&net_todo_run_mutex);
2987 }
2988
2989 /**
2990  *      alloc_netdev - allocate network device
2991  *      @sizeof_priv:   size of private data to allocate space for
2992  *      @name:          device name format string
2993  *      @setup:         callback to initialize device
2994  *
2995  *      Allocates a struct net_device with private data area for driver use
2996  *      and performs basic initialization.
2997  */
2998 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
2999                 void (*setup)(struct net_device *))
3000 {
3001         void *p;
3002         struct net_device *dev;
3003         int alloc_size;
3004
3005         /* ensure 32-byte alignment of both the device and private area */
3006         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3007         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3008
3009         p = kmalloc(alloc_size, GFP_KERNEL);
3010         if (!p) {
3011                 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3012                 return NULL;
3013         }
3014         memset(p, 0, alloc_size);
3015
3016         dev = (struct net_device *)
3017                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3018         dev->padded = (char *)dev - (char *)p;
3019
3020         if (sizeof_priv)
3021                 dev->priv = netdev_priv(dev);
3022
3023         setup(dev);
3024         strcpy(dev->name, name);
3025         return dev;
3026 }
3027 EXPORT_SYMBOL(alloc_netdev);
3028
3029 /**
3030  *      free_netdev - free network device
3031  *      @dev: device
3032  *
3033  *      This function does the last stage of destroying an allocated device
3034  *      interface. The reference to the device object is released.
3035  *      If this is the last reference then it will be freed.
3036  */
3037 void free_netdev(struct net_device *dev)
3038 {
3039 #ifdef CONFIG_SYSFS
3040         /*  Compatiablity with error handling in drivers */
3041         if (dev->reg_state == NETREG_UNINITIALIZED) {
3042                 kfree((char *)dev - dev->padded);
3043                 return;
3044         }
3045
3046         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3047         dev->reg_state = NETREG_RELEASED;
3048
3049         /* will free via class release */
3050         class_device_put(&dev->class_dev);
3051 #else
3052         kfree((char *)dev - dev->padded);
3053 #endif
3054 }
3055
3056 /* Synchronize with packet receive processing. */
3057 void synchronize_net(void)
3058 {
3059         might_sleep();
3060         synchronize_rcu();
3061 }
3062
3063 /**
3064  *      unregister_netdevice - remove device from the kernel
3065  *      @dev: device
3066  *
3067  *      This function shuts down a device interface and removes it
3068  *      from the kernel tables. On success 0 is returned, on a failure
3069  *      a negative errno code is returned.
3070  *
3071  *      Callers must hold the rtnl semaphore.  You may want
3072  *      unregister_netdev() instead of this.
3073  */
3074
3075 int unregister_netdevice(struct net_device *dev)
3076 {
3077         struct net_device *d, **dp;
3078
3079         BUG_ON(dev_boot_phase);
3080         ASSERT_RTNL();
3081
3082         /* Some devices call without registering for initialization unwind. */
3083         if (dev->reg_state == NETREG_UNINITIALIZED) {
3084                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3085                                   "was registered\n", dev->name, dev);
3086                 return -ENODEV;
3087         }
3088
3089         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3090
3091         /* If device is running, close it first. */
3092         if (dev->flags & IFF_UP)
3093                 dev_close(dev);
3094
3095         /* And unlink it from device chain. */
3096         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3097                 if (d == dev) {
3098                         write_lock_bh(&dev_base_lock);
3099                         hlist_del(&dev->name_hlist);
3100                         hlist_del(&dev->index_hlist);
3101                         if (dev_tail == &dev->next)
3102                                 dev_tail = dp;
3103                         *dp = d->next;
3104                         write_unlock_bh(&dev_base_lock);
3105                         break;
3106                 }
3107         }
3108         if (!d) {
3109                 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3110                        dev->name);
3111                 return -ENODEV;
3112         }
3113
3114         dev->reg_state = NETREG_UNREGISTERING;
3115
3116         synchronize_net();
3117
3118         /* Shutdown queueing discipline. */
3119         dev_shutdown(dev);
3120
3121
3122         /* Notify protocols, that we are about to destroy
3123            this device. They should clean all the things.
3124         */
3125         notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3126
3127         /*
3128          *      Flush the multicast chain
3129          */
3130         dev_mc_discard(dev);
3131
3132         if (dev->uninit)
3133                 dev->uninit(dev);
3134
3135         /* Notifier chain MUST detach us from master device. */
3136         BUG_TRAP(!dev->master);
3137
3138         free_divert_blk(dev);
3139
3140         /* Finish processing unregister after unlock */
3141         net_set_todo(dev);
3142
3143         synchronize_net();
3144
3145         dev_put(dev);
3146         return 0;
3147 }
3148
3149 /**
3150  *      unregister_netdev - remove device from the kernel
3151  *      @dev: device
3152  *
3153  *      This function shuts down a device interface and removes it
3154  *      from the kernel tables. On success 0 is returned, on a failure
3155  *      a negative errno code is returned.
3156  *
3157  *      This is just a wrapper for unregister_netdevice that takes
3158  *      the rtnl semaphore.  In general you want to use this and not
3159  *      unregister_netdevice.
3160  */
3161 void unregister_netdev(struct net_device *dev)
3162 {
3163         rtnl_lock();
3164         unregister_netdevice(dev);
3165         rtnl_unlock();
3166 }
3167
3168 EXPORT_SYMBOL(unregister_netdev);
3169
3170 #ifdef CONFIG_HOTPLUG_CPU
3171 static int dev_cpu_callback(struct notifier_block *nfb,
3172                             unsigned long action,
3173                             void *ocpu)
3174 {
3175         struct sk_buff **list_skb;
3176         struct net_device **list_net;
3177         struct sk_buff *skb;
3178         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3179         struct softnet_data *sd, *oldsd;
3180
3181         if (action != CPU_DEAD)
3182                 return NOTIFY_OK;
3183
3184         local_irq_disable();
3185         cpu = smp_processor_id();
3186         sd = &per_cpu(softnet_data, cpu);
3187         oldsd = &per_cpu(softnet_data, oldcpu);
3188
3189         /* Find end of our completion_queue. */
3190         list_skb = &sd->completion_queue;
3191         while (*list_skb)
3192                 list_skb = &(*list_skb)->next;
3193         /* Append completion queue from offline CPU. */
3194         *list_skb = oldsd->completion_queue;
3195         oldsd->completion_queue = NULL;
3196
3197         /* Find end of our output_queue. */
3198         list_net = &sd->output_queue;
3199         while (*list_net)
3200                 list_net = &(*list_net)->next_sched;
3201         /* Append output queue from offline CPU. */
3202         *list_net = oldsd->output_queue;
3203         oldsd->output_queue = NULL;
3204
3205         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3206         local_irq_enable();
3207
3208         /* Process offline CPU's input_pkt_queue */
3209         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3210                 netif_rx(skb);
3211
3212         return NOTIFY_OK;
3213 }
3214 #endif /* CONFIG_HOTPLUG_CPU */
3215
3216
3217 /*
3218  *      Initialize the DEV module. At boot time this walks the device list and
3219  *      unhooks any devices that fail to initialise (normally hardware not
3220  *      present) and leaves us with a valid list of present and active devices.
3221  *
3222  */
3223
3224 /*
3225  *       This is called single threaded during boot, so no need
3226  *       to take the rtnl semaphore.
3227  */
3228 static int __init net_dev_init(void)
3229 {
3230         int i, rc = -ENOMEM;
3231
3232         BUG_ON(!dev_boot_phase);
3233
3234         net_random_init();
3235
3236         if (dev_proc_init())
3237                 goto out;
3238
3239         if (netdev_sysfs_init())
3240                 goto out;
3241
3242         INIT_LIST_HEAD(&ptype_all);
3243         for (i = 0; i < 16; i++)
3244                 INIT_LIST_HEAD(&ptype_base[i]);
3245
3246         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3247                 INIT_HLIST_HEAD(&dev_name_head[i]);
3248
3249         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3250                 INIT_HLIST_HEAD(&dev_index_head[i]);
3251
3252         /*
3253          *      Initialise the packet receive queues.
3254          */
3255
3256         for_each_cpu(i) {
3257                 struct softnet_data *queue;
3258
3259                 queue = &per_cpu(softnet_data, i);
3260                 skb_queue_head_init(&queue->input_pkt_queue);
3261                 queue->completion_queue = NULL;
3262                 INIT_LIST_HEAD(&queue->poll_list);
3263                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3264                 queue->backlog_dev.weight = weight_p;
3265                 queue->backlog_dev.poll = process_backlog;
3266                 atomic_set(&queue->backlog_dev.refcnt, 1);
3267         }
3268
3269         dev_boot_phase = 0;
3270
3271         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3272         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3273
3274         hotcpu_notifier(dev_cpu_callback, 0);
3275         dst_init();
3276         dev_mcast_init();
3277         rc = 0;
3278 out:
3279         return rc;
3280 }
3281
3282 subsys_initcall(net_dev_init);
3283
3284 EXPORT_SYMBOL(__dev_get_by_index);
3285 EXPORT_SYMBOL(__dev_get_by_name);
3286 EXPORT_SYMBOL(__dev_remove_pack);
3287 EXPORT_SYMBOL(__skb_linearize);
3288 EXPORT_SYMBOL(dev_valid_name);
3289 EXPORT_SYMBOL(dev_add_pack);
3290 EXPORT_SYMBOL(dev_alloc_name);
3291 EXPORT_SYMBOL(dev_close);
3292 EXPORT_SYMBOL(dev_get_by_flags);
3293 EXPORT_SYMBOL(dev_get_by_index);
3294 EXPORT_SYMBOL(dev_get_by_name);
3295 EXPORT_SYMBOL(dev_open);
3296 EXPORT_SYMBOL(dev_queue_xmit);
3297 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
3298 EXPORT_SYMBOL(dev_queue_xmit_nit);
3299 #endif
3300 EXPORT_SYMBOL(dev_remove_pack);
3301 EXPORT_SYMBOL(dev_set_allmulti);
3302 EXPORT_SYMBOL(dev_set_promiscuity);
3303 EXPORT_SYMBOL(dev_change_flags);
3304 EXPORT_SYMBOL(dev_set_mtu);
3305 EXPORT_SYMBOL(dev_set_mac_address);
3306 EXPORT_SYMBOL(free_netdev);
3307 EXPORT_SYMBOL(netdev_boot_setup_check);
3308 EXPORT_SYMBOL(netdev_set_master);
3309 EXPORT_SYMBOL(netdev_state_change);
3310 EXPORT_SYMBOL(netif_receive_skb);
3311 EXPORT_SYMBOL(netif_rx);
3312 EXPORT_SYMBOL(register_gifconf);
3313 EXPORT_SYMBOL(register_netdevice);
3314 EXPORT_SYMBOL(register_netdevice_notifier);
3315 EXPORT_SYMBOL(skb_checksum_help);
3316 EXPORT_SYMBOL(synchronize_net);
3317 EXPORT_SYMBOL(unregister_netdevice);
3318 EXPORT_SYMBOL(unregister_netdevice_notifier);
3319 EXPORT_SYMBOL(net_enable_timestamp);
3320 EXPORT_SYMBOL(net_disable_timestamp);
3321 EXPORT_SYMBOL(dev_get_flags);
3322
3323 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3324 EXPORT_SYMBOL(br_handle_frame_hook);
3325 EXPORT_SYMBOL(br_fdb_get_hook);
3326 EXPORT_SYMBOL(br_fdb_put_hook);
3327 #endif
3328
3329 #ifdef CONFIG_KMOD
3330 EXPORT_SYMBOL(dev_load);
3331 #endif
3332
3333 EXPORT_PER_CPU_SYMBOL(softnet_data);