net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro, <bir7@leland.Stanford.Edu>
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <asm/bitops.h>
  78 #include <linux/config.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/string.h>
  84 #include <linux/mm.h>
  85 #include <linux/socket.h>
  86 #include <linux/sockios.h>
  87 #include <linux/errno.h>
  88 #include <linux/interrupt.h>
  89 #include <linux/if_ether.h>
  90 #include <linux/netdevice.h>
  91 #include <linux/etherdevice.h>
  92 #include <linux/notifier.h>
  93 #include <linux/skbuff.h>
  94 #include <net/sock.h>
  95 #include <linux/rtnetlink.h>
  96 #include <linux/proc_fs.h>
  97 #include <linux/seq_file.h>
  98 #include <linux/stat.h>
  99 #include <linux/if_bridge.h>
 100 #include <linux/divert.h>
 101 #include <net/dst.h>
 102 #include <net/pkt_sched.h>
 103 #include <net/checksum.h>
 104 #include <linux/highmem.h>
 105 #include <linux/init.h>
 106 #include <linux/kmod.h>
 107 #include <linux/module.h>
 108 #include <linux/kallsyms.h>
 109 #include <linux/netpoll.h>
 110 #include <linux/rcupdate.h>
 111 #ifdef CONFIG_NET_RADIO
 112 #include <linux/wireless.h>             /* Note : will define WIRELESS_EXT */
 113 #include <net/iw_handler.h>
 114 #endif  /* CONFIG_NET_RADIO */
 115 #include <asm/current.h>
 116
 117 /* This define, if set, will randomly drop a packet when congestion
 118  * is more than moderate.  It helps fairness in the multi-interface
 119  * case when one of them is a hog, but it kills performance for the
 120  * single interface case so it is off now by default.
 121  */
 122 #undef RAND_LIE
 123
 124 /* Setting this will sample the queue lengths and thus congestion
 125  * via a timer instead of as each packet is received.
 126  */
 127 #undef OFFLINE_SAMPLE
 128
 129 /*
 130  *      The list of packet types we will receive (as opposed to discard)
 131  *      and the routines to invoke.
 132  *
 133  *      Why 16. Because with 16 the only overlap we get on a hash of the
 134  *      low nibble of the protocol value is RARP/SNAP/X.25.
 135  *
 136  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 137  *             sure which should go first, but I bet it won't make much
 138  *             difference if we are running VLANs.  The good news is that
 139  *             this protocol won't be in the list unless compiled in, so
 140  *             the average user (w/out VLANs) will not be adversly affected.
 141  *             --BLG
 142  *
 143  *              0800    IP
 144  *              8100    802.1Q VLAN
 145  *              0001    802.3
 146  *              0002    AX.25
 147  *              0004    802.2
 148  *              8035    RARP
 149  *              0005    SNAP
 150  *              0805    X.25
 151  *              0806    ARP
 152  *              8137    IPX
 153  *              0009    Localtalk
 154  *              86DD    IPv6
 155  */
 156
 157 static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
 158 static struct list_head ptype_base[16]; /* 16 way hashed list */
 159 static struct list_head ptype_all;              /* Taps */
 160
 161 #ifdef OFFLINE_SAMPLE
 162 static void sample_queue(unsigned long dummy);
 163 static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
 164 #endif
 165
 166 /*
 167  * The @dev_base list is protected by @dev_base_lock and the rtln
 168  * semaphore.
 169  *
 170  * Pure readers hold dev_base_lock for reading.
 171  *
 172  * Writers must hold the rtnl semaphore while they loop through the
 173  * dev_base list, and hold dev_base_lock for writing when they do the
 174  * actual updates.  This allows pure readers to access the list even
 175  * while a writer is preparing to update it.
 176  *
 177  * To put it another way, dev_base_lock is held for writing only to
 178  * protect against pure readers; the rtnl semaphore provides the
 179  * protection against other writers.
 180  *
 181  * See, for example usages, register_netdevice() and
 182  * unregister_netdevice(), which must be called with the rtnl
 183  * semaphore held.
 184  */
 185 struct net_device *dev_base;
 186 struct net_device **dev_tail = &dev_base;
 187 rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
 188
 189 EXPORT_SYMBOL(dev_base);
 190 EXPORT_SYMBOL(dev_base_lock);
 191
 192 #define NETDEV_HASHBITS 8
 193 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 194 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 195
 196 static inline struct hlist_head *dev_name_hash(const char *name)
 197 {
 198         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 199         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 200 }
 201
 202 static inline struct hlist_head *dev_index_hash(int ifindex)
 203 {
 204         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 205 }
 206
 207 /*
 208  *      Our notifier list
 209  */
 210
 211 static struct notifier_block *netdev_chain;
 212
 213 /*
 214  *      Device drivers call our routines to queue packets here. We empty the
 215  *      queue in the local softnet handler.
 216  */
 217 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
 218
 219 #ifdef CONFIG_SYSFS
 220 extern int netdev_sysfs_init(void);
 221 extern int netdev_register_sysfs(struct net_device *);
 222 extern void netdev_unregister_sysfs(struct net_device *);
 223 #else
 224 #define netdev_sysfs_init()             (0)
 225 #define netdev_register_sysfs(dev)      (0)
 226 #define netdev_unregister_sysfs(dev)    do { } while(0)
 227 #endif
 228
 229 /* netdump function */
 230 void (*netdump_func) (struct pt_regs *regs) = NULL;
 231
 232 /*******************************************************************************
 233
 234                 Protocol management and registration routines
 235
 236 *******************************************************************************/
 237
 238 /*
 239  *      For efficiency
 240  */
 241
 242 int netdev_nit;
 243
 244 /*
 245  *      Add a protocol ID to the list. Now that the input handler is
 246  *      smarter we can dispense with all the messy stuff that used to be
 247  *      here.
 248  *
 249  *      BEWARE!!! Protocol handlers, mangling input packets,
 250  *      MUST BE last in hash buckets and checking protocol handlers
 251  *      MUST start from promiscuous ptype_all chain in net_bh.
 252  *      It is true now, do not change it.
 253  *      Explanation follows: if protocol handler, mangling packet, will
 254  *      be the first on list, it is not able to sense, that packet
 255  *      is cloned and should be copied-on-write, so that it will
 256  *      change it and subsequent readers will get broken packet.
 257  *                                                      --ANK (980803)
 258  */
 259
 260 /**
 261  *      dev_add_pack - add packet handler
 262  *      @pt: packet type declaration
 263  *
 264  *      Add a protocol handler to the networking stack. The passed &packet_type
 265  *      is linked into kernel lists and may not be freed until it has been
 266  *      removed from the kernel lists.
 267  *
 268  *      This call does not sleep therefore it can not
 269  *      guarantee all CPU's that are in middle of receiving packets
 270  *      will see the new packet type (until the next received packet).
 271  */
 272
 273 void dev_add_pack(struct packet_type *pt)
 274 {
 275         int hash;
 276
 277         spin_lock_bh(&ptype_lock);
 278         if (pt->type == htons(ETH_P_ALL)) {
 279                 netdev_nit++;
 280                 list_add_rcu(&pt->list, &ptype_all);
 281         } else {
 282                 hash = ntohs(pt->type) & 15;
 283                 list_add_rcu(&pt->list, &ptype_base[hash]);
 284         }
 285         spin_unlock_bh(&ptype_lock);
 286 }
 287
 288 extern void linkwatch_run_queue(void);
 289
 290
 291
 292 /**
 293  *      __dev_remove_pack        - remove packet handler
 294  *      @pt: packet type declaration
 295  *
 296  *      Remove a protocol handler that was previously added to the kernel
 297  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 298  *      from the kernel lists and can be freed or reused once this function
 299  *      returns.
 300  *
 301  *      The packet type might still be in use by receivers
 302  *      and must not be freed until after all the CPU's have gone
 303  *      through a quiescent state.
 304  */
 305 void __dev_remove_pack(struct packet_type *pt)
 306 {
 307         struct list_head *head;
 308         struct packet_type *pt1;
 309
 310         spin_lock_bh(&ptype_lock);
 311
 312         if (pt->type == htons(ETH_P_ALL)) {
 313                 netdev_nit--;
 314                 head = &ptype_all;
 315         } else
 316                 head = &ptype_base[ntohs(pt->type) & 15];
 317
 318         list_for_each_entry(pt1, head, list) {
 319                 if (pt == pt1) {
 320                         list_del_rcu(&pt->list);
 321                         goto out;
 322                 }
 323         }
 324
 325         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 326 out:
 327         spin_unlock_bh(&ptype_lock);
 328 }
 329 /**
 330  *      dev_remove_pack  - remove packet handler
 331  *      @pt: packet type declaration
 332  *
 333  *      Remove a protocol handler that was previously added to the kernel
 334  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 335  *      from the kernel lists and can be freed or reused once this function
 336  *      returns.
 337  *
 338  *      This call sleeps to guarantee that no CPU is looking at the packet
 339  *      type after return.
 340  */
 341 void dev_remove_pack(struct packet_type *pt)
 342 {
 343         __dev_remove_pack(pt);
 344
 345         synchronize_net();
 346 }
 347
 348 /******************************************************************************
 349
 350                       Device Boot-time Settings Routines
 351
 352 *******************************************************************************/
 353
 354 /* Boot time configuration table */
 355 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 356
 357 /**
 358  *      netdev_boot_setup_add   - add new setup entry
 359  *      @name: name of the device
 360  *      @map: configured settings for the device
 361  *
 362  *      Adds new setup entry to the dev_boot_setup list.  The function
 363  *      returns 0 on error and 1 on success.  This is a generic routine to
 364  *      all netdevices.
 365  */
 366 int netdev_boot_setup_add(char *name, struct ifmap *map)
 367 {
 368         struct netdev_boot_setup *s;
 369         int i;
 370
 371         s = dev_boot_setup;
 372         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 373                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 374                         memset(s[i].name, 0, sizeof(s[i].name));
 375                         strcpy(s[i].name, name);
 376                         memcpy(&s[i].map, map, sizeof(s[i].map));
 377                         break;
 378                 }
 379         }
 380
 381         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 382 }
 383
 384 /**
 385  *      netdev_boot_setup_check - check boot time settings
 386  *      @dev: the netdevice
 387  *
 388  *      Check boot time settings for the device.
 389  *      The found settings are set for the device to be used
 390  *      later in the device probing.
 391  *      Returns 0 if no settings found, 1 if they are.
 392  */
 393 int netdev_boot_setup_check(struct net_device *dev)
 394 {
 395         struct netdev_boot_setup *s = dev_boot_setup;
 396         int i;
 397
 398         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 399                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 400                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 401                         dev->irq        = s[i].map.irq;
 402                         dev->base_addr  = s[i].map.base_addr;
 403                         dev->mem_start  = s[i].map.mem_start;
 404                         dev->mem_end    = s[i].map.mem_end;
 405                         return 1;
 406                 }
 407         }
 408         return 0;
 409 }
 410
 411
 412 /**
 413  *      netdev_boot_base        - get address from boot time settings
 414  *      @prefix: prefix for network device
 415  *      @unit: id for network device
 416  *
 417  *      Check boot time settings for the base address of device.
 418  *      The found settings are set for the device to be used
 419  *      later in the device probing.
 420  *      Returns 0 if no settings found.
 421  */
 422 unsigned long netdev_boot_base(const char *prefix, int unit)
 423 {
 424         const struct netdev_boot_setup *s = dev_boot_setup;
 425         char name[IFNAMSIZ];
 426         int i;
 427
 428         sprintf(name, "%s%d", prefix, unit);
 429
 430         /*
 431          * If device already registered then return base of 1
 432          * to indicate not to probe for this interface
 433          */
 434         if (__dev_get_by_name(name))
 435                 return 1;
 436
 437         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 438                 if (!strcmp(name, s[i].name))
 439                         return s[i].map.base_addr;
 440         return 0;
 441 }
 442
 443 /*
 444  * Saves at boot time configured settings for any netdevice.
 445  */
 446 int __init netdev_boot_setup(char *str)
 447 {
 448         int ints[5];
 449         struct ifmap map;
 450
 451         str = get_options(str, ARRAY_SIZE(ints), ints);
 452         if (!str || !*str)
 453                 return 0;
 454
 455         /* Save settings */
 456         memset(&map, 0, sizeof(map));
 457         if (ints[0] > 0)
 458                 map.irq = ints[1];
 459         if (ints[0] > 1)
 460                 map.base_addr = ints[2];
 461         if (ints[0] > 2)
 462                 map.mem_start = ints[3];
 463         if (ints[0] > 3)
 464                 map.mem_end = ints[4];
 465
 466         /* Add new entry to the list */
 467         return netdev_boot_setup_add(str, &map);
 468 }
 469
 470 __setup("netdev=", netdev_boot_setup);
 471
 472 /*******************************************************************************
 473
 474                             Device Interface Subroutines
 475
 476 *******************************************************************************/
 477
 478 /**
 479  *      __dev_get_by_name       - find a device by its name
 480  *      @name: name to find
 481  *
 482  *      Find an interface by name. Must be called under RTNL semaphore
 483  *      or @dev_base_lock. If the name is found a pointer to the device
 484  *      is returned. If the name is not found then %NULL is returned. The
 485  *      reference counters are not incremented so the caller must be
 486  *      careful with locks.
 487  */
 488
 489 struct net_device *__dev_get_by_name(const char *name)
 490 {
 491         struct hlist_node *p;
 492
 493         hlist_for_each(p, dev_name_hash(name)) {
 494                 struct net_device *dev
 495                         = hlist_entry(p, struct net_device, name_hlist);
 496                 if (!strncmp(dev->name, name, IFNAMSIZ))
 497                         return dev;
 498         }
 499         return NULL;
 500 }
 501
 502 /**
 503  *      dev_get_by_name         - find a device by its name
 504  *      @name: name to find
 505  *
 506  *      Find an interface by name. This can be called from any
 507  *      context and does its own locking. The returned handle has
 508  *      the usage count incremented and the caller must use dev_put() to
 509  *      release it when it is no longer needed. %NULL is returned if no
 510  *      matching device is found.
 511  */
 512
 513 struct net_device *dev_get_by_name(const char *name)
 514 {
 515         struct net_device *dev;
 516
 517         read_lock(&dev_base_lock);
 518         dev = __dev_get_by_name(name);
 519         if (dev)
 520                 dev_hold(dev);
 521         read_unlock(&dev_base_lock);
 522         return dev;
 523 }
 524
 525 /*
 526    Return value is changed to int to prevent illegal usage in future.
 527    It is still legal to use to check for device existence.
 528
 529    User should understand, that the result returned by this function
 530    is meaningless, if it was not issued under rtnl semaphore.
 531  */
 532
 533 /**
 534  *      dev_get -       test if a device exists
 535  *      @name:  name to test for
 536  *
 537  *      Test if a name exists. Returns true if the name is found. In order
 538  *      to be sure the name is not allocated or removed during the test the
 539  *      caller must hold the rtnl semaphore.
 540  *
 541  *      This function exists only for back compatibility with older
 542  *      drivers.
 543  */
 544 int __dev_get(const char *name)
 545 {
 546         struct net_device *dev;
 547
 548         read_lock(&dev_base_lock);
 549         dev = __dev_get_by_name(name);
 550         read_unlock(&dev_base_lock);
 551         return dev != NULL;
 552 }
 553
 554 /**
 555  *      __dev_get_by_index - find a device by its ifindex
 556  *      @ifindex: index of device
 557  *
 558  *      Search for an interface by index. Returns %NULL if the device
 559  *      is not found or a pointer to the device. The device has not
 560  *      had its reference counter increased so the caller must be careful
 561  *      about locking. The caller must hold either the RTNL semaphore
 562  *      or @dev_base_lock.
 563  */
 564
 565 struct net_device *__dev_get_by_index(int ifindex)
 566 {
 567         struct hlist_node *p;
 568
 569         hlist_for_each(p, dev_index_hash(ifindex)) {
 570                 struct net_device *dev
 571                         = hlist_entry(p, struct net_device, index_hlist);
 572                 if (dev->ifindex == ifindex)
 573                         return dev;
 574         }
 575         return NULL;
 576 }
 577
 578
 579 /**
 580  *      dev_get_by_index - find a device by its ifindex
 581  *      @ifindex: index of device
 582  *
 583  *      Search for an interface by index. Returns NULL if the device
 584  *      is not found or a pointer to the device. The device returned has
 585  *      had a reference added and the pointer is safe until the user calls
 586  *      dev_put to indicate they have finished with it.
 587  */
 588
 589 struct net_device *dev_get_by_index(int ifindex)
 590 {
 591         struct net_device *dev;
 592
 593         read_lock(&dev_base_lock);
 594         dev = __dev_get_by_index(ifindex);
 595         if (dev)
 596                 dev_hold(dev);
 597         read_unlock(&dev_base_lock);
 598         return dev;
 599 }
 600
 601 /**
 602  *      dev_getbyhwaddr - find a device by its hardware address
 603  *      @type: media type of device
 604  *      @ha: hardware address
 605  *
 606  *      Search for an interface by MAC address. Returns NULL if the device
 607  *      is not found or a pointer to the device. The caller must hold the
 608  *      rtnl semaphore. The returned device has not had its ref count increased
 609  *      and the caller must therefore be careful about locking
 610  *
 611  *      BUGS:
 612  *      If the API was consistent this would be __dev_get_by_hwaddr
 613  */
 614
 615 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 616 {
 617         struct net_device *dev;
 618
 619         ASSERT_RTNL();
 620
 621         for (dev = dev_base; dev; dev = dev->next)
 622                 if (dev->type == type &&
 623                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 624                         break;
 625         return dev;
 626 }
 627
 628 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 629 {
 630         struct net_device *dev;
 631
 632         for (dev = dev_base; dev; dev = dev->next)
 633                 if (dev->type == type)
 634                         break;
 635         return dev;
 636 }
 637
 638 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 639
 640 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 641 {
 642         struct net_device *dev;
 643
 644         rtnl_lock();
 645         dev = __dev_getfirstbyhwtype(type);
 646         if (dev)
 647                 dev_hold(dev);
 648         rtnl_unlock();
 649         return dev;
 650 }
 651
 652 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 653
 654 /**
 655  *      dev_get_by_flags - find any device with given flags
 656  *      @if_flags: IFF_* values
 657  *      @mask: bitmask of bits in if_flags to check
 658  *
 659  *      Search for any interface with the given flags. Returns NULL if a device
 660  *      is not found or a pointer to the device. The device returned has
 661  *      had a reference added and the pointer is safe until the user calls
 662  *      dev_put to indicate they have finished with it.
 663  */
 664
 665 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 666 {
 667         struct net_device *dev;
 668
 669         read_lock(&dev_base_lock);
 670         dev = __dev_get_by_flags(if_flags, mask);
 671         if (dev)
 672                 dev_hold(dev);
 673         read_unlock(&dev_base_lock);
 674         return dev;
 675 }
 676
 677 /**
 678  *      __dev_get_by_flags - find any device with given flags
 679  *      @if_flags: IFF_* values
 680  *      @mask: bitmask of bits in if_flags to check
 681  *
 682  *      Search for any interface with the given flags. Returns NULL if a device
 683  *      is not found or a pointer to the device. The caller must hold either
 684  *      the RTNL semaphore or @dev_base_lock.
 685  */
 686
 687 struct net_device *__dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 688 {
 689         struct net_device *dev;
 690
 691         for (dev = dev_base; dev != NULL; dev = dev->next) {
 692                 if (((dev->flags ^ if_flags) & mask) == 0)
 693                         return dev;
 694         }
 695         return NULL;
 696 }
 697
 698 /**
 699  *      dev_valid_name - check if name is okay for network device
 700  *      @name: name string
 701  *
 702  *      Network device names need to be valid file names to
 703  *      to allow sysfs to work
 704  */
 705 int dev_valid_name(const char *name)
 706 {
 707         return !(*name == '\0'
 708                  || !strcmp(name, ".")
 709                  || !strcmp(name, "..")
 710                  || strchr(name, '/'));
 711 }
 712
 713 /**
 714  *      dev_alloc_name - allocate a name for a device
 715  *      @dev: device
 716  *      @name: name format string
 717  *
 718  *      Passed a format string - eg "lt%d" it will try and find a suitable
 719  *      id. Not efficient for many devices, not called a lot. The caller
 720  *      must hold the dev_base or rtnl lock while allocating the name and
 721  *      adding the device in order to avoid duplicates. Returns the number
 722  *      of the unit assigned or a negative errno code.
 723  */
 724
 725 int dev_alloc_name(struct net_device *dev, const char *name)
 726 {
 727         int i = 0;
 728         char buf[IFNAMSIZ];
 729         const char *p;
 730         const int max_netdevices = 8*PAGE_SIZE;
 731         long *inuse;
 732         struct net_device *d;
 733
 734         p = strnchr(name, IFNAMSIZ-1, '%');
 735         if (p) {
 736                 /*
 737                  * Verify the string as this thing may have come from
 738                  * the user.  There must be either one "%d" and no other "%"
 739                  * characters.
 740                  */
 741                 if (p[1] != 'd' || strchr(p + 2, '%'))
 742                         return -EINVAL;
 743
 744                 /* Use one page as a bit array of possible slots */
 745                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 746                 if (!inuse)
 747                         return -ENOMEM;
 748
 749                 for (d = dev_base; d; d = d->next) {
 750                         if (!sscanf(d->name, name, &i))
 751                                 continue;
 752                         if (i < 0 || i >= max_netdevices)
 753                                 continue;
 754
 755                         /*  avoid cases where sscanf is not exact inverse of printf */
 756                         snprintf(buf, sizeof(buf), name, i);
 757                         if (!strncmp(buf, d->name, IFNAMSIZ))
 758                                 set_bit(i, inuse);
 759                 }
 760
 761                 i = find_first_zero_bit(inuse, max_netdevices);
 762                 free_page((unsigned long) inuse);
 763         }
 764
 765         snprintf(buf, sizeof(buf), name, i);
 766         if (!__dev_get_by_name(buf)) {
 767                 strlcpy(dev->name, buf, IFNAMSIZ);
 768                 return i;
 769         }
 770
 771         /* It is possible to run out of possible slots
 772          * when the name is long and there isn't enough space left
 773          * for the digits, or if all bits are used.
 774          */
 775         return -ENFILE;
 776 }
 777
 778
 779 /**
 780  *      dev_change_name - change name of a device
 781  *      @dev: device
 782  *      @newname: name (or format string) must be at least IFNAMSIZ
 783  *
 784  *      Change name of a device, can pass format strings "eth%d".
 785  *      for wildcarding.
 786  */
 787 int dev_change_name(struct net_device *dev, char *newname)
 788 {
 789         int err = 0;
 790
 791         ASSERT_RTNL();
 792
 793         if (dev->flags & IFF_UP)
 794                 return -EBUSY;
 795
 796         if (!dev_valid_name(newname))
 797                 return -EINVAL;
 798
 799         if (strchr(newname, '%')) {
 800                 err = dev_alloc_name(dev, newname);
 801                 if (err < 0)
 802                         return err;
 803                 strcpy(newname, dev->name);
 804         }
 805         else if (__dev_get_by_name(newname))
 806                 return -EEXIST;
 807         else
 808                 strlcpy(dev->name, newname, IFNAMSIZ);
 809
 810         err = class_device_rename(&dev->class_dev, dev->name);
 811         if (!err) {
 812                 hlist_del(&dev->name_hlist);
 813                 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 814                 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 815         }
 816
 817         return err;
 818 }
 819
 820 /**
 821  *      netdev_state_change - device changes state
 822  *      @dev: device to cause notification
 823  *
 824  *      Called to indicate a device has changed state. This function calls
 825  *      the notifier chains for netdev_chain and sends a NEWLINK message
 826  *      to the routing socket.
 827  */
 828 void netdev_state_change(struct net_device *dev)
 829 {
 830         if (dev->flags & IFF_UP) {
 831                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
 832                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 833         }
 834 }
 835
 836 /**
 837  *      dev_load        - load a network module
 838  *      @name: name of interface
 839  *
 840  *      If a network interface is not present and the process has suitable
 841  *      privileges this function loads the module. If module loading is not
 842  *      available in this kernel then it becomes a nop.
 843  */
 844
 845 void dev_load(const char *name)
 846 {
 847         struct net_device *dev;
 848
 849         read_lock(&dev_base_lock);
 850         dev = __dev_get_by_name(name);
 851         read_unlock(&dev_base_lock);
 852
 853         if (!dev && capable(CAP_SYS_MODULE))
 854                 request_module("%s", name);
 855 }
 856
 857 static int default_rebuild_header(struct sk_buff *skb)
 858 {
 859         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 860                skb->dev ? skb->dev->name : "NULL!!!");
 861         kfree_skb(skb);
 862         return 1;
 863 }
 864
 865
 866 /*
 867  * Some old buggy device drivers change get_stats after registering
 868  * the device.  Try and trap them here.
 869  * This can be elimnated when all devices are known fixed.
 870  */
 871 static inline int get_stats_changed(struct net_device *dev)
 872 {
 873         int changed = dev->last_stats != dev->get_stats;
 874         dev->last_stats = dev->get_stats;
 875         return changed;
 876 }
 877
 878 /**
 879  *      dev_open        - prepare an interface for use.
 880  *      @dev:   device to open
 881  *
 882  *      Takes a device from down to up state. The device's private open
 883  *      function is invoked and then the multicast lists are loaded. Finally
 884  *      the device is moved into the up state and a %NETDEV_UP message is
 885  *      sent to the netdev notifier chain.
 886  *
 887  *      Calling this function on an active interface is a nop. On a failure
 888  *      a negative errno code is returned.
 889  */
 890 int dev_open(struct net_device *dev)
 891 {
 892         int ret = 0;
 893
 894         /*
 895          *      Is it already up?
 896          */
 897
 898         if (dev->flags & IFF_UP)
 899                 return 0;
 900
 901         /*
 902          *       Check for broken device drivers.
 903          */
 904         if (get_stats_changed(dev) && net_ratelimit()) {
 905                 printk(KERN_ERR "%s: driver changed get_stats after register\n",
 906                        dev->name);
 907         }
 908
 909         /*
 910          *      Is it even present?
 911          */
 912         if (!netif_device_present(dev))
 913                 return -ENODEV;
 914
 915         /*
 916          *      Call device private open method
 917          */
 918         set_bit(__LINK_STATE_START, &dev->state);
 919         if (dev->open) {
 920                 ret = dev->open(dev);
 921                 if (ret)
 922                         clear_bit(__LINK_STATE_START, &dev->state);
 923         }
 924
 925         /*
 926          *      Check for more broken device drivers.
 927          */
 928         if (get_stats_changed(dev) && net_ratelimit()) {
 929                 printk(KERN_ERR "%s: driver changed get_stats in open\n",
 930                        dev->name);
 931         }
 932
 933         /*
 934          *      If it went open OK then:
 935          */
 936
 937         if (!ret) {
 938                 /*
 939                  *      Set the flags.
 940                  */
 941                 dev->flags |= IFF_UP;
 942
 943                 /*
 944                  *      Initialize multicasting status
 945                  */
 946                 dev_mc_upload(dev);
 947
 948                 /*
 949                  *      Wakeup transmit queue engine
 950                  */
 951                 dev_activate(dev);
 952
 953                 /*
 954                  *      ... and announce new interface.
 955                  */
 956                 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 957         }
 958         return ret;
 959 }
 960
 961 /**
 962  *      dev_close - shutdown an interface.
 963  *      @dev: device to shutdown
 964  *
 965  *      This function moves an active device into down state. A
 966  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 967  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 968  *      chain.
 969  */
 970 int dev_close(struct net_device *dev)
 971 {
 972         if (!(dev->flags & IFF_UP))
 973                 return 0;
 974
 975         /*
 976          *      Tell people we are going down, so that they can
 977          *      prepare to death, when device is still operating.
 978          */
 979         notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
 980
 981         dev_deactivate(dev);
 982
 983         clear_bit(__LINK_STATE_START, &dev->state);
 984
 985         /* Synchronize to scheduled poll. We cannot touch poll list,
 986          * it can be even on different cpu. So just clear netif_running(),
 987          * and wait when poll really will happen. Actually, the best place
 988          * for this is inside dev->stop() after device stopped its irq
 989          * engine, but this requires more changes in devices. */
 990
 991         smp_mb__after_clear_bit(); /* Commit netif_running(). */
 992         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
 993                 /* No hurry. */
 994                 current->state = TASK_INTERRUPTIBLE;
 995                 schedule_timeout(1);
 996         }
 997
 998         /*
 999          *      Call the device specific close. This cannot fail.
1000          *      Only if device is UP
1001          *
1002          *      We allow it to be called even after a DETACH hot-plug
1003          *      event.
1004          */
1005         if (dev->stop)
1006                 dev->stop(dev);
1007
1008         /*
1009          *      Device is now down.
1010          */
1011
1012         dev->flags &= ~IFF_UP;
1013
1014         /*
1015          * Tell people we are down
1016          */
1017         notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1018
1019         return 0;
1020 }
1021
1022
1023 /*
1024  *      Device change register/unregister. These are not inline or static
1025  *      as we export them to the world.
1026  */
1027
1028 /**
1029  *      register_netdevice_notifier - register a network notifier block
1030  *      @nb: notifier
1031  *
1032  *      Register a notifier to be called when network device events occur.
1033  *      The notifier passed is linked into the kernel structures and must
1034  *      not be reused until it has been unregistered. A negative errno code
1035  *      is returned on a failure.
1036  *
1037  *      When registered all registration and up events are replayed
1038  *      to the new notifier to allow device to have a race free
1039  *      view of the network device list.
1040  */
1041
1042 int register_netdevice_notifier(struct notifier_block *nb)
1043 {
1044         struct net_device *dev;
1045         int err;
1046
1047         rtnl_lock();
1048         err = notifier_chain_register(&netdev_chain, nb);
1049         if (!err) {
1050                 for (dev = dev_base; dev; dev = dev->next) {
1051                         nb->notifier_call(nb, NETDEV_REGISTER, dev);
1052
1053                         if (dev->flags & IFF_UP)
1054                                 nb->notifier_call(nb, NETDEV_UP, dev);
1055                 }
1056         }
1057         rtnl_unlock();
1058         return err;
1059 }
1060
1061 /**
1062  *      unregister_netdevice_notifier - unregister a network notifier block
1063  *      @nb: notifier
1064  *
1065  *      Unregister a notifier previously registered by
1066  *      register_netdevice_notifier(). The notifier is unlinked into the
1067  *      kernel structures and may then be reused. A negative errno code
1068  *      is returned on a failure.
1069  */
1070
1071 int unregister_netdevice_notifier(struct notifier_block *nb)
1072 {
1073         return notifier_chain_unregister(&netdev_chain, nb);
1074 }
1075
1076 /**
1077  *      call_netdevice_notifiers - call all network notifier blocks
1078  *      @val: value passed unmodified to notifier function
1079  *      @v:   pointer passed unmodified to notifier function
1080  *
1081  *      Call all network notifier blocks.  Parameters and return value
1082  *      are as for notifier_call_chain().
1083  */
1084
1085 int call_netdevice_notifiers(unsigned long val, void *v)
1086 {
1087         return notifier_call_chain(&netdev_chain, val, v);
1088 }
1089
1090 /*
1091  *      Support routine. Sends outgoing frames to any network
1092  *      taps currently in use.
1093  */
1094
1095 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1096 {
1097         struct packet_type *ptype;
1098         net_timestamp(&skb->stamp);
1099
1100         rcu_read_lock();
1101         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1102                 /* Never send packets back to the socket
1103                  * they originated from - MvS (miquels@drinkel.ow.org)
1104                  */
1105                 if ((ptype->dev == dev || !ptype->dev) &&
1106                     (ptype->af_packet_priv == NULL ||
1107                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1108                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1109                         if (!skb2)
1110                                 break;
1111
1112                         /* skb->nh should be correctly
1113                            set by sender, so that the second statement is
1114                            just protection against buggy protocols.
1115                          */
1116                         skb2->mac.raw = skb2->data;
1117
1118                         if (skb2->nh.raw < skb2->data ||
1119                             skb2->nh.raw > skb2->tail) {
1120                                 if (net_ratelimit())
1121                                         printk(KERN_CRIT "protocol %04x is "
1122                                                "buggy, dev %s\n",
1123                                                skb2->protocol, dev->name);
1124                                 skb2->nh.raw = skb2->data;
1125                         }
1126
1127                         skb2->h.raw = skb2->nh.raw;
1128                         skb2->pkt_type = PACKET_OUTGOING;
1129                         ptype->func(skb2, skb->dev, ptype);
1130                 }
1131         }
1132         rcu_read_unlock();
1133 }
1134
1135 /*
1136  * Invalidate hardware checksum when packet is to be mangled, and
1137  * complete checksum manually on outgoing path.
1138  */
1139 int skb_checksum_help(struct sk_buff **pskb, int inward)
1140 {
1141         unsigned int csum;
1142         int ret = 0, offset = (*pskb)->h.raw - (*pskb)->data;
1143
1144         if (inward) {
1145                 (*pskb)->ip_summed = CHECKSUM_NONE;
1146                 goto out;
1147         }
1148
1149         if (skb_shared(*pskb)  || skb_cloned(*pskb)) {
1150                 struct sk_buff *newskb = skb_copy(*pskb, GFP_ATOMIC);
1151                 if (!newskb) {
1152                         ret = -ENOMEM;
1153                         goto out;
1154                 }
1155                 if ((*pskb)->sk)
1156                         skb_set_owner_w(newskb, (*pskb)->sk);
1157                 kfree_skb(*pskb);
1158                 *pskb = newskb;
1159         }
1160
1161         if (offset > (int)(*pskb)->len)
1162                 BUG();
1163         csum = skb_checksum(*pskb, offset, (*pskb)->len-offset, 0);
1164
1165         offset = (*pskb)->tail - (*pskb)->h.raw;
1166         if (offset <= 0)
1167                 BUG();
1168         if ((*pskb)->csum + 2 > offset)
1169                 BUG();
1170
1171         *(u16*)((*pskb)->h.raw + (*pskb)->csum) = csum_fold(csum);
1172         (*pskb)->ip_summed = CHECKSUM_NONE;
1173 out:
1174         return ret;
1175 }
1176
1177 #ifdef CONFIG_HIGHMEM
1178 /* Actually, we should eliminate this check as soon as we know, that:
1179  * 1. IOMMU is present and allows to map all the memory.
1180  * 2. No high memory really exists on this machine.
1181  */
1182
1183 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1184 {
1185         int i;
1186
1187         if (dev->features & NETIF_F_HIGHDMA)
1188                 return 0;
1189
1190         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1191                 if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
1192                         return 1;
1193
1194         return 0;
1195 }
1196 #else
1197 #define illegal_highdma(dev, skb)       (0)
1198 #endif
1199
1200 extern void skb_release_data(struct sk_buff *);
1201
1202 /* Keep head the same: replace data */
1203 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1204 {
1205         unsigned int size;
1206         u8 *data;
1207         long offset;
1208         struct skb_shared_info *ninfo;
1209         int headerlen = skb->data - skb->head;
1210         int expand = (skb->tail + skb->data_len) - skb->end;
1211
1212         if (skb_shared(skb))
1213                 BUG();
1214
1215         if (expand <= 0)
1216                 expand = 0;
1217
1218         size = skb->end - skb->head + expand;
1219         size = SKB_DATA_ALIGN(size);
1220         data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1221         if (!data)
1222                 return -ENOMEM;
1223
1224         /* Copy entire thing */
1225         if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1226                 BUG();
1227
1228         /* Set up shinfo */
1229         ninfo = (struct skb_shared_info*)(data + size);
1230         atomic_set(&ninfo->dataref, 1);
1231         ninfo->tso_size = skb_shinfo(skb)->tso_size;
1232         ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1233         ninfo->nr_frags = 0;
1234         ninfo->frag_list = NULL;
1235
1236         /* Offset between the two in bytes */
1237         offset = data - skb->head;
1238
1239         /* Free old data. */
1240         skb_release_data(skb);
1241
1242         skb->head = data;
1243         skb->end  = data + size;
1244
1245         /* Set up new pointers */
1246         skb->h.raw   += offset;
1247         skb->nh.raw  += offset;
1248         skb->mac.raw += offset;
1249         skb->tail    += offset;
1250         skb->data    += offset;
1251
1252         /* We are no longer a clone, even if we were. */
1253         skb->cloned    = 0;
1254
1255         skb->tail     += skb->data_len;
1256         skb->data_len  = 0;
1257         return 0;
1258 }
1259
1260 #define HARD_TX_LOCK_BH(dev, cpu) {                     \
1261         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1262                 spin_lock_bh(&dev->xmit_lock);          \
1263                 dev->xmit_lock_owner = cpu;             \
1264         }                                               \
1265 }
1266
1267 #define HARD_TX_UNLOCK_BH(dev) {                        \
1268         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1269                 dev->xmit_lock_owner = -1;              \
1270                 spin_unlock_bh(&dev->xmit_lock);        \
1271         }                                               \
1272 }
1273
1274 static inline void qdisc_run(struct net_device *dev)
1275 {
1276         while (!netif_queue_stopped(dev) &&
1277                qdisc_restart(dev)<0)
1278                 /* NOTHING */;
1279 }
1280
1281 /**
1282  *      dev_queue_xmit - transmit a buffer
1283  *      @skb: buffer to transmit
1284  *
1285  *      Queue a buffer for transmission to a network device. The caller must
1286  *      have set the device and priority and built the buffer before calling
1287  *      this function. The function can be called from an interrupt.
1288  *
1289  *      A negative errno code is returned on a failure. A success does not
1290  *      guarantee the frame will be transmitted as it may be dropped due
1291  *      to congestion or traffic shaping.
1292  */
1293
1294 int dev_queue_xmit(struct sk_buff *skb)
1295 {
1296         struct net_device *dev = skb->dev;
1297         struct Qdisc *q;
1298         int rc = -ENOMEM;
1299
1300         if (skb_shinfo(skb)->frag_list &&
1301             !(dev->features & NETIF_F_FRAGLIST) &&
1302             __skb_linearize(skb, GFP_ATOMIC))
1303                 goto out_kfree_skb;
1304
1305         /* Fragmented skb is linearized if device does not support SG,
1306          * or if at least one of fragments is in highmem and device
1307          * does not support DMA from it.
1308          */
1309         if (skb_shinfo(skb)->nr_frags &&
1310             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1311             __skb_linearize(skb, GFP_ATOMIC))
1312                 goto out_kfree_skb;
1313
1314         /* If packet is not checksummed and device does not support
1315          * checksumming for this protocol, complete checksumming here.
1316          */
1317         if (skb->ip_summed == CHECKSUM_HW &&
1318             (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1319              (!(dev->features & NETIF_F_IP_CSUM) ||
1320               skb->protocol != htons(ETH_P_IP))))
1321                 if (skb_checksum_help(&skb, 0))
1322                         goto out_kfree_skb;
1323
1324         rcu_read_lock();
1325         /* Updates of qdisc are serialized by queue_lock.
1326          * The struct Qdisc which is pointed to by qdisc is now a
1327          * rcu structure - it may be accessed without acquiring
1328          * a lock (but the structure may be stale.) The freeing of the
1329          * qdisc will be deferred until it's known that there are no
1330          * more references to it.
1331          *
1332          * If the qdisc has an enqueue function, we still need to
1333          * hold the queue_lock before calling it, since queue_lock
1334          * also serializes access to the device queue.
1335          */
1336
1337         q = dev->qdisc;
1338         smp_read_barrier_depends();
1339 #ifdef CONFIG_NET_CLS_ACT
1340         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1341 #endif
1342         if (q->enqueue) {
1343                 /* Grab device queue */
1344                 spin_lock_bh(&dev->queue_lock);
1345
1346                 rc = q->enqueue(skb, q);
1347
1348                 qdisc_run(dev);
1349
1350                 spin_unlock_bh(&dev->queue_lock);
1351                 rcu_read_unlock();
1352                 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1353                 goto out;
1354         }
1355         rcu_read_unlock();
1356
1357         /* The device has no queue. Common case for software devices:
1358            loopback, all the sorts of tunnels...
1359
1360            Really, it is unlikely that xmit_lock protection is necessary here.
1361            (f.e. loopback and IP tunnels are clean ignoring statistics
1362            counters.)
1363            However, it is possible, that they rely on protection
1364            made by us here.
1365
1366            Check this and shot the lock. It is not prone from deadlocks.
1367            Either shot noqueue qdisc, it is even simpler 8)
1368          */
1369         if (dev->flags & IFF_UP) {
1370                 int cpu = get_cpu();
1371
1372                 if (dev->xmit_lock_owner != cpu) {
1373
1374                         HARD_TX_LOCK_BH(dev, cpu);
1375                         put_cpu();
1376
1377                         if (!netif_queue_stopped(dev)) {
1378                                 if (netdev_nit)
1379                                         dev_queue_xmit_nit(skb, dev);
1380
1381                                 rc = 0;
1382                                 if (!dev->hard_start_xmit(skb, dev)) {
1383                                         HARD_TX_UNLOCK_BH(dev);
1384                                         goto out;
1385                                 }
1386                         }
1387                         HARD_TX_UNLOCK_BH(dev);
1388                         if (net_ratelimit())
1389                                 printk(KERN_CRIT "Virtual device %s asks to "
1390                                        "queue packet!\n", dev->name);
1391                         goto out_enetdown;
1392                 } else {
1393                         put_cpu();
1394                         /* Recursion is detected! It is possible,
1395                          * unfortunately */
1396                         if (net_ratelimit())
1397                                 printk(KERN_CRIT "Dead loop on virtual device "
1398                                        "%s, fix it urgently!\n", dev->name);
1399                 }
1400         }
1401 out_enetdown:
1402         rc = -ENETDOWN;
1403 out_kfree_skb:
1404         kfree_skb(skb);
1405 out:
1406         return rc;
1407 }
1408
1409
1410 /*=======================================================================
1411                         Receiver routines
1412   =======================================================================*/
1413
1414 int netdev_max_backlog = 300;
1415 int weight_p = 64;            /* old backlog weight */
1416 /* These numbers are selected based on intuition and some
1417  * experimentatiom, if you have more scientific way of doing this
1418  * please go ahead and fix things.
1419  */
1420 int no_cong_thresh = 10;
1421 int no_cong = 20;
1422 int lo_cong = 100;
1423 int mod_cong = 290;
1424
1425 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1426
1427
1428 #ifdef CONFIG_NET_HW_FLOWCONTROL
1429 atomic_t netdev_dropping = ATOMIC_INIT(0);
1430 static unsigned long netdev_fc_mask = 1;
1431 unsigned long netdev_fc_xoff;
1432 spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
1433
1434 static struct
1435 {
1436         void (*stimul)(struct net_device *);
1437         struct net_device *dev;
1438 } netdev_fc_slots[BITS_PER_LONG];
1439
1440 int netdev_register_fc(struct net_device *dev,
1441                        void (*stimul)(struct net_device *dev))
1442 {
1443         int bit = 0;
1444         unsigned long flags;
1445
1446         spin_lock_irqsave(&netdev_fc_lock, flags);
1447         if (netdev_fc_mask != ~0UL) {
1448                 bit = ffz(netdev_fc_mask);
1449                 netdev_fc_slots[bit].stimul = stimul;
1450                 netdev_fc_slots[bit].dev = dev;
1451                 set_bit(bit, &netdev_fc_mask);
1452                 clear_bit(bit, &netdev_fc_xoff);
1453         }
1454         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1455         return bit;
1456 }
1457
1458 void netdev_unregister_fc(int bit)
1459 {
1460         unsigned long flags;
1461
1462         spin_lock_irqsave(&netdev_fc_lock, flags);
1463         if (bit > 0) {
1464                 netdev_fc_slots[bit].stimul = NULL;
1465                 netdev_fc_slots[bit].dev = NULL;
1466                 clear_bit(bit, &netdev_fc_mask);
1467                 clear_bit(bit, &netdev_fc_xoff);
1468         }
1469         spin_unlock_irqrestore(&netdev_fc_lock, flags);
1470 }
1471
1472 static void netdev_wakeup(void)
1473 {
1474         unsigned long xoff;
1475
1476         spin_lock(&netdev_fc_lock);
1477         xoff = netdev_fc_xoff;
1478         netdev_fc_xoff = 0;
1479         while (xoff) {
1480                 int i = ffz(~xoff);
1481                 xoff &= ~(1 << i);
1482                 netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
1483         }
1484         spin_unlock(&netdev_fc_lock);
1485 }
1486 #endif
1487
1488 static void get_sample_stats(int cpu)
1489 {
1490 #ifdef RAND_LIE
1491         unsigned long rd;
1492         int rq;
1493 #endif
1494         struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1495         int blog = sd->input_pkt_queue.qlen;
1496         int avg_blog = sd->avg_blog;
1497
1498         avg_blog = (avg_blog >> 1) + (blog >> 1);
1499
1500         if (avg_blog > mod_cong) {
1501                 /* Above moderate congestion levels. */
1502                 sd->cng_level = NET_RX_CN_HIGH;
1503 #ifdef RAND_LIE
1504                 rd = net_random();
1505                 rq = rd % netdev_max_backlog;
1506                 if (rq < avg_blog) /* unlucky bastard */
1507                         sd->cng_level = NET_RX_DROP;
1508 #endif
1509         } else if (avg_blog > lo_cong) {
1510                 sd->cng_level = NET_RX_CN_MOD;
1511 #ifdef RAND_LIE
1512                 rd = net_random();
1513                 rq = rd % netdev_max_backlog;
1514                         if (rq < avg_blog) /* unlucky bastard */
1515                                 sd->cng_level = NET_RX_CN_HIGH;
1516 #endif
1517         } else if (avg_blog > no_cong)
1518                 sd->cng_level = NET_RX_CN_LOW;
1519         else  /* no congestion */
1520                 sd->cng_level = NET_RX_SUCCESS;
1521
1522         sd->avg_blog = avg_blog;
1523 }
1524
1525 #ifdef OFFLINE_SAMPLE
1526 static void sample_queue(unsigned long dummy)
1527 {
1528 /* 10 ms 0r 1ms -- i don't care -- JHS */
1529         int next_tick = 1;
1530         int cpu = smp_processor_id();
1531
1532         get_sample_stats(cpu);
1533         next_tick += jiffies;
1534         mod_timer(&samp_timer, next_tick);
1535 }
1536 #endif
1537
1538
1539 /**
1540  *      netif_rx        -       post buffer to the network code
1541  *      @skb: buffer to post
1542  *
1543  *      This function receives a packet from a device driver and queues it for
1544  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1545  *      may be dropped during processing for congestion control or by the
1546  *      protocol layers.
1547  *
1548  *      return values:
1549  *      NET_RX_SUCCESS  (no congestion)
1550  *      NET_RX_CN_LOW   (low congestion)
1551  *      NET_RX_CN_MOD   (moderate congestion)
1552  *      NET_RX_CN_HIGH  (high congestion)
1553  *      NET_RX_DROP     (packet was dropped)
1554  *
1555  */
1556
1557 int netif_rx(struct sk_buff *skb)
1558 {
1559         int this_cpu;
1560         struct softnet_data *queue;
1561         unsigned long flags;
1562
1563 #ifdef CONFIG_NETPOLL
1564         if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
1565                 kfree_skb(skb);
1566                 return NET_RX_DROP;
1567         }
1568 #endif
1569
1570         if (!skb->stamp.tv_sec)
1571                 net_timestamp(&skb->stamp);
1572
1573         /*
1574          * The code is rearranged so that the path is the most
1575          * short when CPU is congested, but is still operating.
1576          */
1577         local_irq_save(flags);
1578         this_cpu = smp_processor_id();
1579         queue = &__get_cpu_var(softnet_data);
1580
1581         __get_cpu_var(netdev_rx_stat).total++;
1582         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1583                 if (queue->input_pkt_queue.qlen) {
1584                         if (queue->throttle)
1585                                 goto drop;
1586
1587 enqueue:
1588                         dev_hold(skb->dev);
1589                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1590 #ifndef OFFLINE_SAMPLE
1591                         get_sample_stats(this_cpu);
1592 #endif
1593                         local_irq_restore(flags);
1594                         return queue->cng_level;
1595                 }
1596
1597                 if (queue->throttle) {
1598                         queue->throttle = 0;
1599 #ifdef CONFIG_NET_HW_FLOWCONTROL
1600                         if (atomic_dec_and_test(&netdev_dropping))
1601                                 netdev_wakeup();
1602 #endif
1603                 }
1604
1605                 netif_rx_schedule(&queue->backlog_dev);
1606                 goto enqueue;
1607         }
1608
1609         if (!queue->throttle) {
1610                 queue->throttle = 1;
1611                 __get_cpu_var(netdev_rx_stat).throttled++;
1612 #ifdef CONFIG_NET_HW_FLOWCONTROL
1613                 atomic_inc(&netdev_dropping);
1614 #endif
1615         }
1616
1617 drop:
1618         __get_cpu_var(netdev_rx_stat).dropped++;
1619         local_irq_restore(flags);
1620
1621         kfree_skb(skb);
1622         return NET_RX_DROP;
1623 }
1624
1625 static __inline__ void skb_bond(struct sk_buff *skb)
1626 {
1627         struct net_device *dev = skb->dev;
1628
1629         if (dev->master) {
1630                 skb->real_dev = skb->dev;
1631                 skb->dev = dev->master;
1632         }
1633 }
1634
1635 static void net_tx_action(struct softirq_action *h)
1636 {
1637         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1638
1639         if (sd->completion_queue) {
1640                 struct sk_buff *clist;
1641
1642                 local_irq_disable();
1643                 clist = sd->completion_queue;
1644                 sd->completion_queue = NULL;
1645                 local_irq_enable();
1646
1647                 while (clist) {
1648                         struct sk_buff *skb = clist;
1649                         clist = clist->next;
1650
1651                         BUG_TRAP(!atomic_read(&skb->users));
1652                         __kfree_skb(skb);
1653                 }
1654         }
1655
1656         if (sd->output_queue) {
1657                 struct net_device *head;
1658
1659                 local_irq_disable();
1660                 head = sd->output_queue;
1661                 sd->output_queue = NULL;
1662                 local_irq_enable();
1663
1664                 while (head) {
1665                         struct net_device *dev = head;
1666                         head = head->next_sched;
1667
1668                         smp_mb__before_clear_bit();
1669                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1670
1671                         if (spin_trylock(&dev->queue_lock)) {
1672                                 qdisc_run(dev);
1673                                 spin_unlock(&dev->queue_lock);
1674                         } else {
1675                                 netif_schedule(dev);
1676                         }
1677                 }
1678         }
1679 }
1680
1681 static __inline__ int deliver_skb(struct sk_buff *skb,
1682                                   struct packet_type *pt_prev, int last)
1683 {
1684         atomic_inc(&skb->users);
1685         return pt_prev->func(skb, skb->dev, pt_prev);
1686 }
1687
1688
1689 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1690 int (*br_handle_frame_hook)(struct sk_buff *skb);
1691
1692 static __inline__ int handle_bridge(struct sk_buff *skb,
1693                                      struct packet_type *pt_prev)
1694 {
1695         int ret = NET_RX_DROP;
1696         if (pt_prev)
1697                 ret = deliver_skb(skb, pt_prev, 0);
1698
1699         return ret;
1700 }
1701
1702 #endif
1703
1704 static inline int __handle_bridge(struct sk_buff *skb,
1705                         struct packet_type **pt_prev, int *ret)
1706 {
1707 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1708         if (skb->dev->br_port && skb->pkt_type != PACKET_LOOPBACK) {
1709                 *ret = handle_bridge(skb, *pt_prev);
1710                 if (br_handle_frame_hook(skb) == 0)
1711                         return 1;
1712
1713                 *pt_prev = NULL;
1714         }
1715 #endif
1716         return 0;
1717 }
1718
1719
1720 #ifdef CONFIG_NET_CLS_ACT
1721 /* TODO: Maybe we should just force sch_ingress to be compiled in
1722  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1723  * a compare and 2 stores extra right now if we dont have it on
1724  * but have CONFIG_NET_CLS_ACT
1725  * NOTE: This doesnt stop any functionality; if you dont have
1726  * the ingress scheduler, you just cant add policies on ingress.
1727  *
1728  */
1729 int ing_filter(struct sk_buff *skb)
1730 {
1731         struct Qdisc *q;
1732         struct net_device *dev = skb->dev;
1733         int result = TC_ACT_OK;
1734
1735         if (dev->qdisc_ingress) {
1736                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1737                 if (MAX_RED_LOOP < ttl++) {
1738                         printk("Redir loop detected Dropping packet (%s->%s)\n",
1739                                 skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1740                         return TC_ACT_SHOT;
1741                 }
1742
1743                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1744
1745                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1746                 if (NULL == skb->input_dev) {
1747                         skb->input_dev = skb->dev;
1748                         printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1749                 }
1750                 spin_lock(&dev->ingress_lock);
1751                 if ((q = dev->qdisc_ingress) != NULL)
1752                         result = q->enqueue(skb, q);
1753                 spin_unlock(&dev->ingress_lock);
1754
1755         }
1756
1757         return result;
1758 }
1759 #endif
1760
1761 int netif_receive_skb(struct sk_buff *skb)
1762 {
1763         struct packet_type *ptype, *pt_prev;
1764         int ret = NET_RX_DROP;
1765         unsigned short type;
1766
1767 #ifdef CONFIG_NETPOLL
1768         if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
1769                 kfree_skb(skb);
1770                 return NET_RX_DROP;
1771         }
1772 #endif
1773
1774         if (!skb->stamp.tv_sec)
1775                 net_timestamp(&skb->stamp);
1776
1777         skb_bond(skb);
1778
1779         __get_cpu_var(netdev_rx_stat).total++;
1780
1781         skb->h.raw = skb->nh.raw = skb->data;
1782         skb->mac_len = skb->nh.raw - skb->mac.raw;
1783
1784         pt_prev = NULL;
1785 #ifdef CONFIG_NET_CLS_ACT
1786         if (skb->tc_verd & TC_NCLS) {
1787                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1788                 rcu_read_lock();
1789                 goto ncls;
1790         }
1791  #endif
1792
1793         rcu_read_lock();
1794         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1795                 if (!ptype->dev || ptype->dev == skb->dev) {
1796                         if (pt_prev)
1797                                 ret = deliver_skb(skb, pt_prev, 0);
1798                         pt_prev = ptype;
1799                 }
1800         }
1801
1802 #ifdef CONFIG_NET_CLS_ACT
1803         if (pt_prev) {
1804                 atomic_inc(&skb->users);
1805                 ret = pt_prev->func(skb, skb->dev, pt_prev);
1806                 pt_prev = NULL; /* noone else should process this after*/
1807         } else {
1808                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1809         }
1810
1811         ret = ing_filter(skb);
1812
1813         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1814                 kfree_skb(skb);
1815                 goto out;
1816         }
1817
1818         skb->tc_verd = 0;
1819 ncls:
1820 #endif
1821
1822         handle_diverter(skb);
1823
1824         if (__handle_bridge(skb, &pt_prev, &ret))
1825                 goto out;
1826
1827         type = skb->protocol;
1828         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1829                 if (ptype->type == type &&
1830                     (!ptype->dev || ptype->dev == skb->dev)) {
1831                         if (pt_prev)
1832                                 ret = deliver_skb(skb, pt_prev, 0);
1833                         pt_prev = ptype;
1834                 }
1835         }
1836
1837         if (pt_prev) {
1838                 ret = pt_prev->func(skb, skb->dev, pt_prev);
1839         } else {
1840                 kfree_skb(skb);
1841                 /* Jamal, now you will not able to escape explaining
1842                  * me how you were going to use this. :-)
1843                  */
1844                 ret = NET_RX_DROP;
1845         }
1846
1847 out:
1848         rcu_read_unlock();
1849         return ret;
1850 }
1851
1852 static int process_backlog(struct net_device *backlog_dev, int *budget)
1853 {
1854         int work = 0;
1855         int quota = min(backlog_dev->quota, *budget);
1856         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1857         unsigned long start_time = jiffies;
1858
1859         for (;;) {
1860                 struct sk_buff *skb;
1861                 struct net_device *dev;
1862
1863                 local_irq_disable();
1864                 skb = __skb_dequeue(&queue->input_pkt_queue);
1865                 if (!skb)
1866                         goto job_done;
1867                 local_irq_enable();
1868
1869                 dev = skb->dev;
1870
1871                 netif_receive_skb(skb);
1872
1873                 dev_put(dev);
1874
1875                 work++;
1876
1877                 if (work >= quota || jiffies - start_time > 1)
1878                         break;
1879
1880 #ifdef CONFIG_NET_HW_FLOWCONTROL
1881                 if (queue->throttle &&
1882                     queue->input_pkt_queue.qlen < no_cong_thresh ) {
1883                         queue->throttle = 0;
1884                         if (atomic_dec_and_test(&netdev_dropping)) {
1885                                 netdev_wakeup();
1886                                 break;
1887                         }
1888                 }
1889 #endif
1890         }
1891
1892         backlog_dev->quota -= work;
1893         *budget -= work;
1894         return -1;
1895
1896 job_done:
1897         backlog_dev->quota -= work;
1898         *budget -= work;
1899
1900         list_del(&backlog_dev->poll_list);
1901         smp_mb__before_clear_bit();
1902         netif_poll_enable(backlog_dev);
1903
1904         if (queue->throttle) {
1905                 queue->throttle = 0;
1906 #ifdef CONFIG_NET_HW_FLOWCONTROL
1907                 if (atomic_dec_and_test(&netdev_dropping))
1908                         netdev_wakeup();
1909 #endif
1910         }
1911         local_irq_enable();
1912         return 0;
1913 }
1914
1915 static void net_rx_action(struct softirq_action *h)
1916 {
1917         struct softnet_data *queue = &__get_cpu_var(softnet_data);
1918         unsigned long start_time = jiffies;
1919         int budget = netdev_max_backlog;
1920
1921
1922         local_irq_disable();
1923
1924         while (!list_empty(&queue->poll_list)) {
1925                 struct net_device *dev;
1926
1927                 if (budget <= 0 || jiffies - start_time > 1)
1928                         goto softnet_break;
1929
1930                 local_irq_enable();
1931
1932                 dev = list_entry(queue->poll_list.next,
1933                                  struct net_device, poll_list);
1934
1935                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1936                         local_irq_disable();
1937                         list_del(&dev->poll_list);
1938                         list_add_tail(&dev->poll_list, &queue->poll_list);
1939                         if (dev->quota < 0)
1940                                 dev->quota += dev->weight;
1941                         else
1942                                 dev->quota = dev->weight;
1943                 } else {
1944                         dev_put(dev);
1945                         local_irq_disable();
1946                 }
1947         }
1948 out:
1949         local_irq_enable();
1950         return;
1951
1952 softnet_break:
1953         __get_cpu_var(netdev_rx_stat).time_squeeze++;
1954         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1955         goto out;
1956 }
1957
1958 static gifconf_func_t * gifconf_list [NPROTO];
1959
1960 /**
1961  *      register_gifconf        -       register a SIOCGIF handler
1962  *      @family: Address family
1963  *      @gifconf: Function handler
1964  *
1965  *      Register protocol dependent address dumping routines. The handler
1966  *      that is passed must not be freed or reused until it has been replaced
1967  *      by another handler.
1968  */
1969 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1970 {
1971         if (family >= NPROTO)
1972                 return -EINVAL;
1973         gifconf_list[family] = gifconf;
1974         return 0;
1975 }
1976
1977
1978 /*
1979  *      Map an interface index to its name (SIOCGIFNAME)
1980  */
1981
1982 /*
1983  *      We need this ioctl for efficient implementation of the
1984  *      if_indextoname() function required by the IPv6 API.  Without
1985  *      it, we would have to search all the interfaces to find a
1986  *      match.  --pb
1987  */
1988
1989 static int dev_ifname(struct ifreq __user *arg)
1990 {
1991         struct net_device *dev;
1992         struct ifreq ifr;
1993
1994         /*
1995          *      Fetch the caller's info block.
1996          */
1997
1998         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1999                 return -EFAULT;
2000
2001         read_lock(&dev_base_lock);
2002         dev = __dev_get_by_index(ifr.ifr_ifindex);
2003         if (!dev) {
2004                 read_unlock(&dev_base_lock);
2005                 return -ENODEV;
2006         }
2007
2008         strcpy(ifr.ifr_name, dev->name);
2009         read_unlock(&dev_base_lock);
2010
2011         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2012                 return -EFAULT;
2013         return 0;
2014 }
2015
2016 /*
2017  *      Perform a SIOCGIFCONF call. This structure will change
2018  *      size eventually, and there is nothing I can do about it.
2019  *      Thus we will need a 'compatibility mode'.
2020  */
2021
2022 static int dev_ifconf(char __user *arg)
2023 {
2024         struct ifconf ifc;
2025         struct net_device *dev;
2026         char __user *pos;
2027         int len;
2028         int total;
2029         int i;
2030
2031         /*
2032          *      Fetch the caller's info block.
2033          */
2034
2035         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2036                 return -EFAULT;
2037
2038         pos = ifc.ifc_buf;
2039         len = ifc.ifc_len;
2040
2041         /*
2042          *      Loop over the interfaces, and write an info block for each.
2043          */
2044
2045         total = 0;
2046         for (dev = dev_base; dev; dev = dev->next) {
2047                 for (i = 0; i < NPROTO; i++) {
2048                         if (gifconf_list[i]) {
2049                                 int done;
2050                                 if (!pos)
2051                                         done = gifconf_list[i](dev, NULL, 0);
2052                                 else
2053                                         done = gifconf_list[i](dev, pos + total,
2054                                                                len - total);
2055                                 if (done < 0)
2056                                         return -EFAULT;
2057                                 total += done;
2058                         }
2059                 }
2060         }
2061
2062         /*
2063          *      All done.  Write the updated control block back to the caller.
2064          */
2065         ifc.ifc_len = total;
2066
2067         /*
2068          *      Both BSD and Solaris return 0 here, so we do too.
2069          */
2070         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2071 }
2072
2073 #ifdef CONFIG_PROC_FS
2074 /*
2075  *      This is invoked by the /proc filesystem handler to display a device
2076  *      in detail.
2077  */
2078 static __inline__ struct net_device *dev_get_idx(loff_t pos)
2079 {
2080         struct net_device *dev;
2081         loff_t i;
2082
2083         for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2084
2085         return i == pos ? dev : NULL;
2086 }
2087
2088 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2089 {
2090         read_lock(&dev_base_lock);
2091         return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2092 }
2093
2094 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2095 {
2096         ++*pos;
2097         return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2098 }
2099
2100 void dev_seq_stop(struct seq_file *seq, void *v)
2101 {
2102         read_unlock(&dev_base_lock);
2103 }
2104
2105 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2106 {
2107         if (dev->get_stats) {
2108                 struct net_device_stats *stats = dev->get_stats(dev);
2109
2110                 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2111                                 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2112                            dev->name, stats->rx_bytes, stats->rx_packets,
2113                            stats->rx_errors,
2114                            stats->rx_dropped + stats->rx_missed_errors,
2115                            stats->rx_fifo_errors,
2116                            stats->rx_length_errors + stats->rx_over_errors +
2117                              stats->rx_crc_errors + stats->rx_frame_errors,
2118                            stats->rx_compressed, stats->multicast,
2119                            stats->tx_bytes, stats->tx_packets,
2120                            stats->tx_errors, stats->tx_dropped,
2121                            stats->tx_fifo_errors, stats->collisions,
2122                            stats->tx_carrier_errors +
2123                              stats->tx_aborted_errors +
2124                              stats->tx_window_errors +
2125                              stats->tx_heartbeat_errors,
2126                            stats->tx_compressed);
2127         } else
2128                 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2129 }
2130
2131 /*
2132  *      Called from the PROCfs module. This now uses the new arbitrary sized
2133  *      /proc/net interface to create /proc/net/dev
2134  */
2135 static int dev_seq_show(struct seq_file *seq, void *v)
2136 {
2137         if (v == SEQ_START_TOKEN)
2138                 seq_puts(seq, "Inter-|   Receive                            "
2139                               "                    |  Transmit\n"
2140                               " face |bytes    packets errs drop fifo frame "
2141                               "compressed multicast|bytes    packets errs "
2142                               "drop fifo colls carrier compressed\n");
2143         else
2144                 dev_seq_printf_stats(seq, v);
2145         return 0;
2146 }
2147
2148 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2149 {
2150         struct netif_rx_stats *rc = NULL;
2151
2152         while (*pos < NR_CPUS)
2153                 if (cpu_online(*pos)) {
2154                         rc = &per_cpu(netdev_rx_stat, *pos);
2155                         break;
2156                 } else
2157                         ++*pos;
2158         return rc;
2159 }
2160
2161 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2162 {
2163         return softnet_get_online(pos);
2164 }
2165
2166 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2167 {
2168         ++*pos;
2169         return softnet_get_online(pos);
2170 }
2171
2172 static void softnet_seq_stop(struct seq_file *seq, void *v)
2173 {
2174 }
2175
2176 static int softnet_seq_show(struct seq_file *seq, void *v)
2177 {
2178         struct netif_rx_stats *s = v;
2179
2180         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2181                    s->total, s->dropped, s->time_squeeze, s->throttled,
2182                    s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
2183                    s->fastroute_deferred_out,
2184 #if 0
2185                    s->fastroute_latency_reduction
2186 #else
2187                    s->cpu_collision
2188 #endif
2189                   );
2190         return 0;
2191 }
2192
2193 static struct seq_operations dev_seq_ops = {
2194         .start = dev_seq_start,
2195         .next  = dev_seq_next,
2196         .stop  = dev_seq_stop,
2197         .show  = dev_seq_show,
2198 };
2199
2200 static int dev_seq_open(struct inode *inode, struct file *file)
2201 {
2202         return seq_open(file, &dev_seq_ops);
2203 }
2204
2205 static struct file_operations dev_seq_fops = {
2206         .owner   = THIS_MODULE,
2207         .open    = dev_seq_open,
2208         .read    = seq_read,
2209         .llseek  = seq_lseek,
2210         .release = seq_release,
2211 };
2212
2213 static struct seq_operations softnet_seq_ops = {
2214         .start = softnet_seq_start,
2215         .next  = softnet_seq_next,
2216         .stop  = softnet_seq_stop,
2217         .show  = softnet_seq_show,
2218 };
2219
2220 static int softnet_seq_open(struct inode *inode, struct file *file)
2221 {
2222         return seq_open(file, &softnet_seq_ops);
2223 }
2224
2225 static struct file_operations softnet_seq_fops = {
2226         .owner   = THIS_MODULE,
2227         .open    = softnet_seq_open,
2228         .read    = seq_read,
2229         .llseek  = seq_lseek,
2230         .release = seq_release,
2231 };
2232
2233 #ifdef WIRELESS_EXT
2234 extern int wireless_proc_init(void);
2235 #else
2236 #define wireless_proc_init() 0
2237 #endif
2238
2239 static int __init dev_proc_init(void)
2240 {
2241         int rc = -ENOMEM;
2242
2243         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2244                 goto out;
2245         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2246                 goto out_dev;
2247         if (wireless_proc_init())
2248                 goto out_softnet;
2249         rc = 0;
2250 out:
2251         return rc;
2252 out_softnet:
2253         proc_net_remove("softnet_stat");
2254 out_dev:
2255         proc_net_remove("dev");
2256         goto out;
2257 }
2258 #else
2259 #define dev_proc_init() 0
2260 #endif  /* CONFIG_PROC_FS */
2261
2262
2263 /**
2264  *      netdev_set_master       -       set up master/slave pair
2265  *      @slave: slave device
2266  *      @master: new master device
2267  *
2268  *      Changes the master device of the slave. Pass %NULL to break the
2269  *      bonding. The caller must hold the RTNL semaphore. On a failure
2270  *      a negative errno code is returned. On success the reference counts
2271  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2272  *      function returns zero.
2273  */
2274 int netdev_set_master(struct net_device *slave, struct net_device *master)
2275 {
2276         struct net_device *old = slave->master;
2277
2278         ASSERT_RTNL();
2279
2280         if (master) {
2281                 if (old)
2282                         return -EBUSY;
2283                 dev_hold(master);
2284         }
2285
2286         slave->master = master;
2287
2288         synchronize_net();
2289
2290         if (old)
2291                 dev_put(old);
2292
2293         if (master)
2294                 slave->flags |= IFF_SLAVE;
2295         else
2296                 slave->flags &= ~IFF_SLAVE;
2297
2298         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2299         return 0;
2300 }
2301
2302 /**
2303  *      dev_set_promiscuity     - update promiscuity count on a device
2304  *      @dev: device
2305  *      @inc: modifier
2306  *
2307  *      Add or remove promsicuity from a device. While the count in the device
2308  *      remains above zero the interface remains promiscuous. Once it hits zero
2309  *      the device reverts back to normal filtering operation. A negative inc
2310  *      value is used to drop promiscuity on the device.
2311  */
2312 void dev_set_promiscuity(struct net_device *dev, int inc)
2313 {
2314         unsigned short old_flags = dev->flags;
2315
2316         dev->flags |= IFF_PROMISC;
2317         if ((dev->promiscuity += inc) == 0)
2318                 dev->flags &= ~IFF_PROMISC;
2319         if (dev->flags ^ old_flags) {
2320                 dev_mc_upload(dev);
2321                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2322                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2323                                                                "left");
2324         }
2325 }
2326
2327 /**
2328  *      dev_set_allmulti        - update allmulti count on a device
2329  *      @dev: device
2330  *      @inc: modifier
2331  *
2332  *      Add or remove reception of all multicast frames to a device. While the
2333  *      count in the device remains above zero the interface remains listening
2334  *      to all interfaces. Once it hits zero the device reverts back to normal
2335  *      filtering operation. A negative @inc value is used to drop the counter
2336  *      when releasing a resource needing all multicasts.
2337  */
2338
2339 void dev_set_allmulti(struct net_device *dev, int inc)
2340 {
2341         unsigned short old_flags = dev->flags;
2342
2343         dev->flags |= IFF_ALLMULTI;
2344         if ((dev->allmulti += inc) == 0)
2345                 dev->flags &= ~IFF_ALLMULTI;
2346         if (dev->flags ^ old_flags)
2347                 dev_mc_upload(dev);
2348 }
2349
2350 unsigned dev_get_flags(const struct net_device *dev)
2351 {
2352         unsigned flags;
2353
2354         flags = (dev->flags & ~(IFF_PROMISC |
2355                                 IFF_ALLMULTI |
2356                                 IFF_RUNNING)) |
2357                 (dev->gflags & (IFF_PROMISC |
2358                                 IFF_ALLMULTI));
2359
2360         if (netif_running(dev) && netif_carrier_ok(dev))
2361                 flags |= IFF_RUNNING;
2362
2363         return flags;
2364 }
2365
2366 int dev_change_flags(struct net_device *dev, unsigned flags)
2367 {
2368         int ret;
2369         int old_flags = dev->flags;
2370
2371         /*
2372          *      Set the flags on our device.
2373          */
2374
2375         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2376                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2377                                IFF_AUTOMEDIA)) |
2378                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2379                                     IFF_ALLMULTI));
2380
2381         /*
2382          *      Load in the correct multicast list now the flags have changed.
2383          */
2384
2385         dev_mc_upload(dev);
2386
2387         /*
2388          *      Have we downed the interface. We handle IFF_UP ourselves
2389          *      according to user attempts to set it, rather than blindly
2390          *      setting it.
2391          */
2392
2393         ret = 0;
2394         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2395                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2396
2397                 if (!ret)
2398                         dev_mc_upload(dev);
2399         }
2400
2401         if (dev->flags & IFF_UP &&
2402             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2403                                           IFF_VOLATILE)))
2404                 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2405
2406         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2407                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2408                 dev->gflags ^= IFF_PROMISC;
2409                 dev_set_promiscuity(dev, inc);
2410         }
2411
2412         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2413            is important. Some (broken) drivers set IFF_PROMISC, when
2414            IFF_ALLMULTI is requested not asking us and not reporting.
2415          */
2416         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2417                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2418                 dev->gflags ^= IFF_ALLMULTI;
2419                 dev_set_allmulti(dev, inc);
2420         }
2421
2422         if (old_flags ^ dev->flags)
2423                 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2424
2425         return ret;
2426 }
2427
2428 int dev_set_mtu(struct net_device *dev, int new_mtu)
2429 {
2430         int err;
2431
2432         if (new_mtu == dev->mtu)
2433                 return 0;
2434
2435         /*      MTU must be positive.    */
2436         if (new_mtu < 0)
2437                 return -EINVAL;
2438
2439         if (!netif_device_present(dev))
2440                 return -ENODEV;
2441
2442         err = 0;
2443         if (dev->change_mtu)
2444                 err = dev->change_mtu(dev, new_mtu);
2445         else
2446                 dev->mtu = new_mtu;
2447         if (!err && dev->flags & IFF_UP)
2448                 notifier_call_chain(&netdev_chain,
2449                                     NETDEV_CHANGEMTU, dev);
2450         return err;
2451 }
2452
2453
2454 /*
2455  *      Perform the SIOCxIFxxx calls.
2456  */
2457 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2458 {
2459         int err;
2460         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2461
2462         if (!dev)
2463                 return -ENODEV;
2464
2465         switch (cmd) {
2466                 case SIOCGIFFLAGS:      /* Get interface flags */
2467                         ifr->ifr_flags = dev_get_flags(dev);
2468                         return 0;
2469
2470                 case SIOCSIFFLAGS:      /* Set interface flags */
2471                         return dev_change_flags(dev, ifr->ifr_flags);
2472
2473                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2474                                            (currently unused) */
2475                         ifr->ifr_metric = 0;
2476                         return 0;
2477
2478                 case SIOCSIFMETRIC:     /* Set the metric on the interface
2479                                            (currently unused) */
2480                         return -EOPNOTSUPP;
2481
2482                 case SIOCGIFMTU:        /* Get the MTU of a device */
2483                         ifr->ifr_mtu = dev->mtu;
2484                         return 0;
2485
2486                 case SIOCSIFMTU:        /* Set the MTU of a device */
2487                         return dev_set_mtu(dev, ifr->ifr_mtu);
2488
2489                 case SIOCGIFHWADDR:
2490                         memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2491                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2492                         ifr->ifr_hwaddr.sa_family = dev->type;
2493                         return 0;
2494
2495                 case SIOCSIFHWADDR:
2496                         if (!dev->set_mac_address)
2497                                 return -EOPNOTSUPP;
2498                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2499                                 return -EINVAL;
2500                         if (!netif_device_present(dev))
2501                                 return -ENODEV;
2502                         err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
2503                         if (!err)
2504                                 notifier_call_chain(&netdev_chain,
2505                                                     NETDEV_CHANGEADDR, dev);
2506                         return err;
2507
2508                 case SIOCSIFHWBROADCAST:
2509                         if (ifr->ifr_hwaddr.sa_family != dev->type)
2510                                 return -EINVAL;
2511                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2512                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2513                         notifier_call_chain(&netdev_chain,
2514                                             NETDEV_CHANGEADDR, dev);
2515                         return 0;
2516
2517                 case SIOCGIFMAP:
2518                         ifr->ifr_map.mem_start = dev->mem_start;
2519                         ifr->ifr_map.mem_end   = dev->mem_end;
2520                         ifr->ifr_map.base_addr = dev->base_addr;
2521                         ifr->ifr_map.irq       = dev->irq;
2522                         ifr->ifr_map.dma       = dev->dma;
2523                         ifr->ifr_map.port      = dev->if_port;
2524                         return 0;
2525
2526                 case SIOCSIFMAP:
2527                         if (dev->set_config) {
2528                                 if (!netif_device_present(dev))
2529                                         return -ENODEV;
2530                                 return dev->set_config(dev, &ifr->ifr_map);
2531                         }
2532                         return -EOPNOTSUPP;
2533
2534                 case SIOCADDMULTI:
2535                         if (!dev->set_multicast_list ||
2536                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2537                                 return -EINVAL;
2538                         if (!netif_device_present(dev))
2539                                 return -ENODEV;
2540                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2541                                           dev->addr_len, 1);
2542
2543                 case SIOCDELMULTI:
2544                         if (!dev->set_multicast_list ||
2545                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2546                                 return -EINVAL;
2547                         if (!netif_device_present(dev))
2548                                 return -ENODEV;
2549                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2550                                              dev->addr_len, 1);
2551
2552                 case SIOCGIFINDEX:
2553                         ifr->ifr_ifindex = dev->ifindex;
2554                         return 0;
2555
2556                 case SIOCGIFTXQLEN:
2557                         ifr->ifr_qlen = dev->tx_queue_len;
2558                         return 0;
2559
2560                 case SIOCSIFTXQLEN:
2561                         if (ifr->ifr_qlen < 0)
2562                                 return -EINVAL;
2563                         dev->tx_queue_len = ifr->ifr_qlen;
2564                         return 0;
2565
2566                 case SIOCSIFNAME:
2567                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2568                         return dev_change_name(dev, ifr->ifr_newname);
2569
2570                 /*
2571                  *      Unknown or private ioctl
2572                  */
2573
2574                 default:
2575                         if ((cmd >= SIOCDEVPRIVATE &&
2576                             cmd <= SIOCDEVPRIVATE + 15) ||
2577                             cmd == SIOCBONDENSLAVE ||
2578                             cmd == SIOCBONDRELEASE ||
2579                             cmd == SIOCBONDSETHWADDR ||
2580                             cmd == SIOCBONDSLAVEINFOQUERY ||
2581                             cmd == SIOCBONDINFOQUERY ||
2582                             cmd == SIOCBONDCHANGEACTIVE ||
2583                             cmd == SIOCGMIIPHY ||
2584                             cmd == SIOCGMIIREG ||
2585                             cmd == SIOCSMIIREG ||
2586                             cmd == SIOCBRADDIF ||
2587                             cmd == SIOCBRDELIF ||
2588                             cmd == SIOCWANDEV) {
2589                                 err = -EOPNOTSUPP;
2590                                 if (dev->do_ioctl) {
2591                                         if (netif_device_present(dev))
2592                                                 err = dev->do_ioctl(dev, ifr,
2593                                                                     cmd);
2594                                         else
2595                                                 err = -ENODEV;
2596                                 }
2597                         } else
2598                                 err = -EINVAL;
2599
2600         }
2601         return err;
2602 }
2603
2604 /*
2605  *      This function handles all "interface"-type I/O control requests. The actual
2606  *      'doing' part of this is dev_ifsioc above.
2607  */
2608
2609 /**
2610  *      dev_ioctl       -       network device ioctl
2611  *      @cmd: command to issue
2612  *      @arg: pointer to a struct ifreq in user space
2613  *
2614  *      Issue ioctl functions to devices. This is normally called by the
2615  *      user space syscall interfaces but can sometimes be useful for
2616  *      other purposes. The return value is the return from the syscall if
2617  *      positive or a negative errno code on error.
2618  */
2619
2620 int dev_ioctl(unsigned int cmd, void __user *arg)
2621 {
2622         struct ifreq ifr;
2623         int ret;
2624         char *colon;
2625
2626         /* One special case: SIOCGIFCONF takes ifconf argument
2627            and requires shared lock, because it sleeps writing
2628            to user space.
2629          */
2630
2631         if (cmd == SIOCGIFCONF) {
2632                 rtnl_shlock();
2633                 ret = dev_ifconf((char __user *) arg);
2634                 rtnl_shunlock();
2635                 return ret;
2636         }
2637         if (cmd == SIOCGIFNAME)
2638                 return dev_ifname((struct ifreq __user *)arg);
2639
2640         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2641                 return -EFAULT;
2642
2643         ifr.ifr_name[IFNAMSIZ-1] = 0;
2644
2645         colon = strchr(ifr.ifr_name, ':');
2646         if (colon)
2647                 *colon = 0;
2648
2649         /*
2650          *      See which interface the caller is talking about.
2651          */
2652
2653         switch (cmd) {
2654                 /*
2655                  *      These ioctl calls:
2656                  *      - can be done by all.
2657                  *      - atomic and do not require locking.
2658                  *      - return a value
2659                  */
2660                 case SIOCGIFFLAGS:
2661                 case SIOCGIFMETRIC:
2662                 case SIOCGIFMTU:
2663                 case SIOCGIFHWADDR:
2664                 case SIOCGIFSLAVE:
2665                 case SIOCGIFMAP:
2666                 case SIOCGIFINDEX:
2667                 case SIOCGIFTXQLEN:
2668                         dev_load(ifr.ifr_name);
2669                         read_lock(&dev_base_lock);
2670                         ret = dev_ifsioc(&ifr, cmd);
2671                         read_unlock(&dev_base_lock);
2672                         if (!ret) {
2673                                 if (colon)
2674                                         *colon = ':';
2675                                 if (copy_to_user(arg, &ifr,
2676                                                  sizeof(struct ifreq)))
2677                                         ret = -EFAULT;
2678                         }
2679                         return ret;
2680
2681                 case SIOCETHTOOL:
2682                         dev_load(ifr.ifr_name);
2683                         rtnl_lock();
2684                         ret = dev_ethtool(&ifr);
2685                         rtnl_unlock();
2686                         if (!ret) {
2687                                 if (colon)
2688                                         *colon = ':';
2689                                 if (copy_to_user(arg, &ifr,
2690                                                  sizeof(struct ifreq)))
2691                                         ret = -EFAULT;
2692                         }
2693                         return ret;
2694
2695                 /*
2696                  *      These ioctl calls:
2697                  *      - require superuser power.
2698                  *      - require strict serialization.
2699                  *      - return a value
2700                  */
2701                 case SIOCGMIIPHY:
2702                 case SIOCGMIIREG:
2703                 case SIOCSIFNAME:
2704                         if (!capable(CAP_NET_ADMIN))
2705                                 return -EPERM;
2706                         dev_load(ifr.ifr_name);
2707                         rtnl_lock();
2708                         ret = dev_ifsioc(&ifr, cmd);
2709                         rtnl_unlock();
2710                         if (!ret) {
2711                                 if (colon)
2712                                         *colon = ':';
2713                                 if (copy_to_user(arg, &ifr,
2714                                                  sizeof(struct ifreq)))
2715                                         ret = -EFAULT;
2716                         }
2717                         return ret;
2718
2719                 /*
2720                  *      These ioctl calls:
2721                  *      - require superuser power.
2722                  *      - require strict serialization.
2723                  *      - do not return a value
2724                  */
2725                 case SIOCSIFFLAGS:
2726                 case SIOCSIFMETRIC:
2727                 case SIOCSIFMTU:
2728                 case SIOCSIFMAP:
2729                 case SIOCSIFHWADDR:
2730                 case SIOCSIFSLAVE:
2731                 case SIOCADDMULTI:
2732                 case SIOCDELMULTI:
2733                 case SIOCSIFHWBROADCAST:
2734                 case SIOCSIFTXQLEN:
2735                 case SIOCSMIIREG:
2736                 case SIOCBONDENSLAVE:
2737                 case SIOCBONDRELEASE:
2738                 case SIOCBONDSETHWADDR:
2739                 case SIOCBONDSLAVEINFOQUERY:
2740                 case SIOCBONDINFOQUERY:
2741                 case SIOCBONDCHANGEACTIVE:
2742                 case SIOCBRADDIF:
2743                 case SIOCBRDELIF:
2744                         if (!capable(CAP_NET_ADMIN))
2745                                 return -EPERM;
2746                         dev_load(ifr.ifr_name);
2747                         rtnl_lock();
2748                         ret = dev_ifsioc(&ifr, cmd);
2749                         rtnl_unlock();
2750                         return ret;
2751
2752                 case SIOCGIFMEM:
2753                         /* Get the per device memory space. We can add this but
2754                          * currently do not support it */
2755                 case SIOCSIFMEM:
2756                         /* Set the per device memory buffer space.
2757                          * Not applicable in our case */
2758                 case SIOCSIFLINK:
2759                         return -EINVAL;
2760
2761                 /*
2762                  *      Unknown or private ioctl.
2763                  */
2764                 default:
2765                         if (cmd == SIOCWANDEV ||
2766                             (cmd >= SIOCDEVPRIVATE &&
2767                              cmd <= SIOCDEVPRIVATE + 15)) {
2768                                 dev_load(ifr.ifr_name);
2769                                 rtnl_lock();
2770                                 ret = dev_ifsioc(&ifr, cmd);
2771                                 rtnl_unlock();
2772                                 if (!ret && copy_to_user(arg, &ifr,
2773                                                          sizeof(struct ifreq)))
2774                                         ret = -EFAULT;
2775                                 return ret;
2776                         }
2777 #ifdef WIRELESS_EXT
2778                         /* Take care of Wireless Extensions */
2779                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2780                                 /* If command is `set a parameter', or
2781                                  * `get the encoding parameters', check if
2782                                  * the user has the right to do it */
2783                                 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2784                                         if (!capable(CAP_NET_ADMIN))
2785                                                 return -EPERM;
2786                                 }
2787                                 dev_load(ifr.ifr_name);
2788                                 rtnl_lock();
2789                                 /* Follow me in net/core/wireless.c */
2790                                 ret = wireless_process_ioctl(&ifr, cmd);
2791                                 rtnl_unlock();
2792                                 if (!ret && IW_IS_GET(cmd) &&
2793                                     copy_to_user(arg, &ifr,
2794                                                  sizeof(struct ifreq)))
2795                                         ret = -EFAULT;
2796                                 return ret;
2797                         }
2798 #endif  /* WIRELESS_EXT */
2799                         return -EINVAL;
2800         }
2801 }
2802
2803
2804 /**
2805  *      dev_new_index   -       allocate an ifindex
2806  *
2807  *      Returns a suitable unique value for a new device interface
2808  *      number.  The caller must hold the rtnl semaphore or the
2809  *      dev_base_lock to be sure it remains unique.
2810  */
2811 int dev_new_index(void)
2812 {
2813         static int ifindex;
2814         for (;;) {
2815                 if (++ifindex <= 0)
2816                         ifindex = 1;
2817                 if (!__dev_get_by_index(ifindex))
2818                         return ifindex;
2819         }
2820 }
2821
2822 static int dev_boot_phase = 1;
2823
2824 /* Delayed registration/unregisteration */
2825 static spinlock_t net_todo_list_lock = SPIN_LOCK_UNLOCKED;
2826 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2827
2828 static inline void net_set_todo(struct net_device *dev)
2829 {
2830         spin_lock(&net_todo_list_lock);
2831         list_add_tail(&dev->todo_list, &net_todo_list);
2832         spin_unlock(&net_todo_list_lock);
2833 }
2834
2835 /**
2836  *      register_netdevice      - register a network device
2837  *      @dev: device to register
2838  *
2839  *      Take a completed network device structure and add it to the kernel
2840  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2841  *      chain. 0 is returned on success. A negative errno code is returned
2842  *      on a failure to set up the device, or if the name is a duplicate.
2843  *
2844  *      Callers must hold the rtnl semaphore.  See the comment at the
2845  *      end of Space.c for details about the locking.  You may want
2846  *      register_netdev() instead of this.
2847  *
2848  *      BUGS:
2849  *      The locking appears insufficient to guarantee two parallel registers
2850  *      will not get the same name.
2851  */
2852
2853 int register_netdevice(struct net_device *dev)
2854 {
2855         struct hlist_head *head;
2856         struct hlist_node *p;
2857         int ret;
2858
2859         BUG_ON(dev_boot_phase);
2860         ASSERT_RTNL();
2861
2862         /* When net_device's are persistent, this will be fatal. */
2863         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2864
2865         spin_lock_init(&dev->queue_lock);
2866         spin_lock_init(&dev->xmit_lock);
2867         dev->xmit_lock_owner = -1;
2868 #ifdef CONFIG_NET_CLS_ACT
2869         spin_lock_init(&dev->ingress_lock);
2870 #endif
2871
2872         ret = alloc_divert_blk(dev);
2873         if (ret)
2874                 goto out;
2875
2876         dev->iflink = -1;
2877
2878         /* Init, if this function is available */
2879         if (dev->init) {
2880                 ret = dev->init(dev);
2881                 if (ret) {
2882                         if (ret > 0)
2883                                 ret = -EIO;
2884                         goto out_err;
2885                 }
2886         }
2887
2888         if (!dev_valid_name(dev->name)) {
2889                 ret = -EINVAL;
2890                 goto out_err;
2891         }
2892
2893         dev->ifindex = dev_new_index();
2894         if (dev->iflink == -1)
2895                 dev->iflink = dev->ifindex;
2896
2897         /* Check for existence of name */
2898         head = dev_name_hash(dev->name);
2899         hlist_for_each(p, head) {
2900                 struct net_device *d
2901                         = hlist_entry(p, struct net_device, name_hlist);
2902                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2903                         ret = -EEXIST;
2904                         goto out_err;
2905                 }
2906         }
2907
2908         /* Fix illegal SG+CSUM combinations. */
2909         if ((dev->features & NETIF_F_SG) &&
2910             !(dev->features & (NETIF_F_IP_CSUM |
2911                                NETIF_F_NO_CSUM |
2912                                NETIF_F_HW_CSUM))) {
2913                 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2914                        dev->name);
2915                 dev->features &= ~NETIF_F_SG;
2916         }
2917
2918         /*
2919          *      nil rebuild_header routine,
2920          *      that should be never called and used as just bug trap.
2921          */
2922
2923         if (!dev->rebuild_header)
2924                 dev->rebuild_header = default_rebuild_header;
2925
2926         /*
2927          *      Default initial state at registry is that the
2928          *      device is present.
2929          */
2930
2931         set_bit(__LINK_STATE_PRESENT, &dev->state);
2932
2933         dev->next = NULL;
2934         dev_init_scheduler(dev);
2935         write_lock_bh(&dev_base_lock);
2936         *dev_tail = dev;
2937         dev_tail = &dev->next;
2938         hlist_add_head(&dev->name_hlist, head);
2939         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2940         dev_hold(dev);
2941         dev->reg_state = NETREG_REGISTERING;
2942         write_unlock_bh(&dev_base_lock);
2943
2944         /* Notify protocols, that a new device appeared. */
2945         notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2946
2947         /* Finish registration after unlock */
2948         net_set_todo(dev);
2949         ret = 0;
2950
2951 out:
2952         return ret;
2953 out_err:
2954         free_divert_blk(dev);
2955         goto out;
2956 }
2957
2958 /*
2959  * netdev_wait_allrefs - wait until all references are gone.
2960  *
2961  * This is called when unregistering network devices.
2962  *
2963  * Any protocol or device that holds a reference should register
2964  * for netdevice notification, and cleanup and put back the
2965  * reference if they receive an UNREGISTER event.
2966  * We can get stuck here if buggy protocols don't correctly
2967  * call dev_put.
2968  */
2969 static void netdev_wait_allrefs(struct net_device *dev)
2970 {
2971         unsigned long rebroadcast_time, warning_time;
2972
2973         rebroadcast_time = warning_time = jiffies;
2974         while (atomic_read(&dev->refcnt) != 0) {
2975                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2976                         rtnl_shlock();
2977
2978                         /* Rebroadcast unregister notification */
2979                         notifier_call_chain(&netdev_chain,
2980                                             NETDEV_UNREGISTER, dev);
2981
2982                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2983                                      &dev->state)) {
2984                                 /* We must not have linkwatch events
2985                                  * pending on unregister. If this
2986                                  * happens, we simply run the queue
2987                                  * unscheduled, resulting in a noop
2988                                  * for this device.
2989                                  */
2990                                 linkwatch_run_queue();
2991                         }
2992
2993                         rtnl_shunlock();
2994
2995                         rebroadcast_time = jiffies;
2996                 }
2997
2998                 current->state = TASK_INTERRUPTIBLE;
2999                 schedule_timeout(HZ / 4);
3000
3001                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3002                         printk(KERN_EMERG "unregister_netdevice: "
3003                                "waiting for %s to become free. Usage "
3004                                "count = %d\n",
3005                                dev->name, atomic_read(&dev->refcnt));
3006                         warning_time = jiffies;
3007                 }
3008         }
3009 }
3010
3011 /* The sequence is:
3012  *
3013  *      rtnl_lock();
3014  *      ...
3015  *      register_netdevice(x1);
3016  *      register_netdevice(x2);
3017  *      ...
3018  *      unregister_netdevice(y1);
3019  *      unregister_netdevice(y2);
3020  *      ...
3021  *      rtnl_unlock();
3022  *      free_netdev(y1);
3023  *      free_netdev(y2);
3024  *
3025  * We are invoked by rtnl_unlock() after it drops the semaphore.
3026  * This allows us to deal with problems:
3027  * 1) We can create/delete sysfs objects which invoke hotplug
3028  *    without deadlocking with linkwatch via keventd.
3029  * 2) Since we run with the RTNL semaphore not held, we can sleep
3030  *    safely in order to wait for the netdev refcnt to drop to zero.
3031  */
3032 static DECLARE_MUTEX(net_todo_run_mutex);
3033 void netdev_run_todo(void)
3034 {
3035         struct list_head list = LIST_HEAD_INIT(list);
3036         int err;
3037
3038
3039         /* Need to guard against multiple cpu's getting out of order. */
3040         down(&net_todo_run_mutex);
3041
3042         /* Not safe to do outside the semaphore.  We must not return
3043          * until all unregister events invoked by the local processor
3044          * have been completed (either by this todo run, or one on
3045          * another cpu).
3046          */
3047         if (list_empty(&net_todo_list))
3048                 goto out;
3049
3050         /* Snapshot list, allow later requests */
3051         spin_lock(&net_todo_list_lock);
3052         list_splice_init(&net_todo_list, &list);
3053         spin_unlock(&net_todo_list_lock);
3054
3055         while (!list_empty(&list)) {
3056                 struct net_device *dev
3057                         = list_entry(list.next, struct net_device, todo_list);
3058                 list_del(&dev->todo_list);
3059
3060                 switch(dev->reg_state) {
3061                 case NETREG_REGISTERING:
3062                         err = netdev_register_sysfs(dev);
3063                         if (err)
3064                                 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
3065                                        dev->name, err);
3066                         dev->reg_state = NETREG_REGISTERED;
3067                         break;
3068
3069                 case NETREG_UNREGISTERING:
3070                         netdev_unregister_sysfs(dev);
3071                         dev->reg_state = NETREG_UNREGISTERED;
3072
3073                         netdev_wait_allrefs(dev);
3074
3075                         /* paranoia */
3076                         BUG_ON(atomic_read(&dev->refcnt));
3077                         BUG_TRAP(!dev->ip_ptr);
3078                         BUG_TRAP(!dev->ip6_ptr);
3079                         BUG_TRAP(!dev->dn_ptr);
3080
3081
3082                         /* It must be the very last action,
3083                          * after this 'dev' may point to freed up memory.
3084                          */
3085                         if (dev->destructor)
3086                                 dev->destructor(dev);
3087                         break;
3088
3089                 default:
3090                         printk(KERN_ERR "network todo '%s' but state %d\n",
3091                                dev->name, dev->reg_state);
3092                         break;
3093                 }
3094         }
3095
3096 out:
3097         up(&net_todo_run_mutex);
3098 }
3099
3100 /**
3101  *      free_netdev - free network device
3102  *      @dev: device
3103  *
3104  *      This function does the last stage of destroying an allocated device
3105  *      interface. The reference to the device object is released.
3106  *      If this is the last reference then it will be freed.
3107  */
3108 void free_netdev(struct net_device *dev)
3109 {
3110 #ifdef CONFIG_SYSFS
3111         /*  Compatiablity with error handling in drivers */
3112         if (dev->reg_state == NETREG_UNINITIALIZED) {
3113                 kfree((char *)dev - dev->padded);
3114                 return;
3115         }
3116
3117         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3118         dev->reg_state = NETREG_RELEASED;
3119
3120         /* will free via class release */
3121         class_device_put(&dev->class_dev);
3122 #else
3123         kfree((char *)dev - dev->padded);
3124 #endif
3125 }
3126
3127 /* Synchronize with packet receive processing. */
3128 void synchronize_net(void)
3129 {
3130         might_sleep();
3131         synchronize_kernel();
3132 }
3133
3134 /**
3135  *      unregister_netdevice - remove device from the kernel
3136  *      @dev: device
3137  *
3138  *      This function shuts down a device interface and removes it
3139  *      from the kernel tables. On success 0 is returned, on a failure
3140  *      a negative errno code is returned.
3141  *
3142  *      Callers must hold the rtnl semaphore.  See the comment at the
3143  *      end of Space.c for details about the locking.  You may want
3144  *      unregister_netdev() instead of this.
3145  */
3146
3147 int unregister_netdevice(struct net_device *dev)
3148 {
3149         struct net_device *d, **dp;
3150
3151         BUG_ON(dev_boot_phase);
3152         ASSERT_RTNL();
3153
3154         /* Some devices call without registering for initialization unwind. */
3155         if (dev->reg_state == NETREG_UNINITIALIZED) {
3156                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3157                                   "was registered\n", dev->name, dev);
3158                 return -ENODEV;
3159         }
3160
3161         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3162
3163         /* If device is running, close it first. */
3164         if (dev->flags & IFF_UP)
3165                 dev_close(dev);
3166
3167         /* And unlink it from device chain. */
3168         for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3169                 if (d == dev) {
3170                         write_lock_bh(&dev_base_lock);
3171                         hlist_del(&dev->name_hlist);
3172                         hlist_del(&dev->index_hlist);
3173                         if (dev_tail == &dev->next)
3174                                 dev_tail = dp;
3175                         *dp = d->next;
3176                         write_unlock_bh(&dev_base_lock);
3177                         break;
3178                 }
3179         }
3180         if (!d) {
3181                 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3182                        dev->name);
3183                 return -ENODEV;
3184         }
3185
3186         dev->reg_state = NETREG_UNREGISTERING;
3187
3188         synchronize_net();
3189
3190         /* Shutdown queueing discipline. */
3191         dev_shutdown(dev);
3192
3193
3194         /* Notify protocols, that we are about to destroy
3195            this device. They should clean all the things.
3196         */
3197         notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3198
3199         /*
3200          *      Flush the multicast chain
3201          */
3202         dev_mc_discard(dev);
3203
3204         if (dev->uninit)
3205                 dev->uninit(dev);
3206
3207         /* Notifier chain MUST detach us from master device. */
3208         BUG_TRAP(!dev->master);
3209
3210         free_divert_blk(dev);
3211
3212         /* Finish processing unregister after unlock */
3213         net_set_todo(dev);
3214
3215         synchronize_net();
3216
3217         dev_put(dev);
3218         return 0;
3219 }
3220
3221 #ifdef CONFIG_HOTPLUG_CPU
3222 static int dev_cpu_callback(struct notifier_block *nfb,
3223                             unsigned long action,
3224                             void *ocpu)
3225 {
3226         struct sk_buff **list_skb;
3227         struct net_device **list_net;
3228         struct sk_buff *skb;
3229         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3230         struct softnet_data *sd, *oldsd;
3231
3232         if (action != CPU_DEAD)
3233                 return NOTIFY_OK;
3234
3235         local_irq_disable();
3236         cpu = smp_processor_id();
3237         sd = &per_cpu(softnet_data, cpu);
3238         oldsd = &per_cpu(softnet_data, oldcpu);
3239
3240         /* Find end of our completion_queue. */
3241         list_skb = &sd->completion_queue;
3242         while (*list_skb)
3243                 list_skb = &(*list_skb)->next;
3244         /* Append completion queue from offline CPU. */
3245         *list_skb = oldsd->completion_queue;
3246         oldsd->completion_queue = NULL;
3247
3248         /* Find end of our output_queue. */
3249         list_net = &sd->output_queue;
3250         while (*list_net)
3251                 list_net = &(*list_net)->next_sched;
3252         /* Append output queue from offline CPU. */
3253         *list_net = oldsd->output_queue;
3254         oldsd->output_queue = NULL;
3255
3256         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3257         local_irq_enable();
3258
3259         /* Process offline CPU's input_pkt_queue */
3260         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3261                 netif_rx(skb);
3262
3263         return NOTIFY_OK;
3264 }
3265 #endif /* CONFIG_HOTPLUG_CPU */
3266
3267
3268 /*
3269  *      Initialize the DEV module. At boot time this walks the device list and
3270  *      unhooks any devices that fail to initialise (normally hardware not
3271  *      present) and leaves us with a valid list of present and active devices.
3272  *
3273  */
3274
3275 /*
3276  *       This is called single threaded during boot, so no need
3277  *       to take the rtnl semaphore.
3278  */
3279 static int __init net_dev_init(void)
3280 {
3281         int i, rc = -ENOMEM;
3282
3283         BUG_ON(!dev_boot_phase);
3284
3285         if (dev_proc_init())
3286                 goto out;
3287
3288         if (netdev_sysfs_init())
3289                 goto out;
3290
3291         INIT_LIST_HEAD(&ptype_all);
3292         for (i = 0; i < 16; i++)
3293                 INIT_LIST_HEAD(&ptype_base[i]);
3294
3295         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3296                 INIT_HLIST_HEAD(&dev_name_head[i]);
3297
3298         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3299                 INIT_HLIST_HEAD(&dev_index_head[i]);
3300
3301         /*
3302          *      Initialise the packet receive queues.
3303          */
3304
3305         for (i = 0; i < NR_CPUS; i++) {
3306                 struct softnet_data *queue;
3307
3308                 queue = &per_cpu(softnet_data, i);
3309                 skb_queue_head_init(&queue->input_pkt_queue);
3310                 queue->throttle = 0;
3311                 queue->cng_level = 0;
3312                 queue->avg_blog = 10; /* arbitrary non-zero */
3313                 queue->completion_queue = NULL;
3314                 INIT_LIST_HEAD(&queue->poll_list);
3315                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3316                 queue->backlog_dev.weight = weight_p;
3317                 queue->backlog_dev.poll = process_backlog;
3318                 atomic_set(&queue->backlog_dev.refcnt, 1);
3319         }
3320
3321 #ifdef OFFLINE_SAMPLE
3322         samp_timer.expires = jiffies + (10 * HZ);
3323         add_timer(&samp_timer);
3324 #endif
3325
3326         dev_boot_phase = 0;
3327
3328         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3329         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3330
3331         hotcpu_notifier(dev_cpu_callback, 0);
3332         dst_init();
3333         dev_mcast_init();
3334         rc = 0;
3335 out:
3336         return rc;
3337 }
3338
3339 subsys_initcall(net_dev_init);
3340
3341 EXPORT_SYMBOL(__dev_get);
3342 EXPORT_SYMBOL(__dev_get_by_flags);
3343 EXPORT_SYMBOL(__dev_get_by_index);
3344 EXPORT_SYMBOL(__dev_get_by_name);
3345 EXPORT_SYMBOL(__dev_remove_pack);
3346 EXPORT_SYMBOL(__skb_linearize);
3347 EXPORT_SYMBOL(call_netdevice_notifiers);
3348 EXPORT_SYMBOL(dev_add_pack);
3349 EXPORT_SYMBOL(dev_alloc_name);
3350 EXPORT_SYMBOL(dev_close);
3351 EXPORT_SYMBOL(dev_get_by_flags);
3352 EXPORT_SYMBOL(dev_get_by_index);
3353 EXPORT_SYMBOL(dev_get_by_name);
3354 EXPORT_SYMBOL(dev_getbyhwaddr);
3355 EXPORT_SYMBOL(dev_ioctl);
3356 EXPORT_SYMBOL(dev_new_index);
3357 EXPORT_SYMBOL(dev_open);
3358 EXPORT_SYMBOL(dev_queue_xmit);
3359 EXPORT_SYMBOL(dev_queue_xmit_nit);
3360 EXPORT_SYMBOL(dev_remove_pack);
3361 EXPORT_SYMBOL(dev_set_allmulti);
3362 EXPORT_SYMBOL(dev_set_promiscuity);
3363 EXPORT_SYMBOL(dev_change_flags);
3364 EXPORT_SYMBOL(dev_set_mtu);
3365 EXPORT_SYMBOL(free_netdev);
3366 EXPORT_SYMBOL(netdev_boot_setup_check);
3367 EXPORT_SYMBOL(netdev_set_master);
3368 EXPORT_SYMBOL(netdev_state_change);
3369 EXPORT_SYMBOL(netif_receive_skb);
3370 EXPORT_SYMBOL(netif_rx);
3371 EXPORT_SYMBOL(register_gifconf);
3372 EXPORT_SYMBOL(register_netdevice);
3373 EXPORT_SYMBOL(register_netdevice_notifier);
3374 EXPORT_SYMBOL(skb_checksum_help);
3375 EXPORT_SYMBOL(synchronize_net);
3376 EXPORT_SYMBOL(unregister_netdevice);
3377 EXPORT_SYMBOL(unregister_netdevice_notifier);
3378
3379 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3380 EXPORT_SYMBOL(br_handle_frame_hook);
3381 #endif
3382
3383 #ifdef CONFIG_KMOD
3384 EXPORT_SYMBOL(dev_load);
3385 #endif
3386 #ifdef CONFIG_NET_HW_FLOWCONTROL
3387 EXPORT_SYMBOL(netdev_dropping);
3388 EXPORT_SYMBOL(netdev_fc_xoff);
3389 EXPORT_SYMBOL(netdev_register_fc);
3390 EXPORT_SYMBOL(netdev_unregister_fc);
3391 #endif
3392
3393 #ifdef CONFIG_NET_CLS_ACT
3394 EXPORT_SYMBOL(ing_filter);
3395 #endif
3396
3397
3398 EXPORT_PER_CPU_SYMBOL(softnet_data);