X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fnet%2Fe1000%2Fe1000_main.c;h=c6259c7127f6bb9aaa4dbda53ee783ed428c1e0e;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=fa29402602169a602d27381f5d98e261f9b4d470;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index fa2940260..c6259c712 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1,100 +1,33 @@ /******************************************************************************* - - Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + Intel PRO/1000 Linux driver + Copyright(c) 1999 - 2006 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place - Suite 330, Boston, MA 02111-1307, USA. - - The full GNU General Public License is included in this distribution in the - file called LICENSE. - + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + Contact Information: Linux NICS + e1000-devel Mailing List Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ #include "e1000.h" - -/* Change Log - * 6.3.9 12/16/2005 - * o incorporate fix for recycled skbs from IBM LTC - * 6.3.7 11/18/2005 - * o Honor eeprom setting for enabling/disabling Wake On Lan - * 6.3.5 11/17/2005 - * o Fix memory leak in rx ring handling for PCI Express adapters - * 6.3.4 11/8/05 - * o Patch from Jesper Juhl to remove redundant NULL checks for kfree - * 6.3.2 9/20/05 - * o Render logic that sets/resets DRV_LOAD as inline functions to - * avoid code replication. If f/w is AMT then set DRV_LOAD only when - * network interface is open. - * o Handle DRV_LOAD set/reset in cases where AMT uses VLANs. - * o Adjust PBA partioning for Jumbo frames using MTU size and not - * rx_buffer_len - * 6.3.1 9/19/05 - * o Use adapter->tx_timeout_factor in Tx Hung Detect logic - (e1000_clean_tx_irq) - * o Support for 8086:10B5 device (Quad Port) - * 6.2.14 9/15/05 - * o In AMT enabled configurations, set/reset DRV_LOAD bit on interface - * open/close - * 6.2.13 9/14/05 - * o Invoke e1000_check_mng_mode only for 8257x controllers since it - * accesses the FWSM that is not supported in other controllers - * 6.2.12 9/9/05 - * o Add support for device id E1000_DEV_ID_82546GB_QUAD_COPPER - * o set RCTL:SECRC only for controllers newer than 82543. - * o When the n/w interface comes down reset DRV_LOAD bit to notify f/w. - * This code was moved from e1000_remove to e1000_close - * 6.2.10 9/6/05 - * o Fix error in updating RDT in el1000_alloc_rx_buffers[_ps] -- one off. - * o Enable fc by default on 82573 controllers (do not read eeprom) - * o Fix rx_errors statistic not to include missed_packet_count - * o Fix rx_dropped statistic not to include missed_packet_count - (Padraig Brady) - * 6.2.9 8/30/05 - * o Remove call to update statistics from the controller ib e1000_get_stats - * 6.2.8 8/30/05 - * o Improved algorithm for rx buffer allocation/rdt update - * o Flow control watermarks relative to rx PBA size - * o Simplified 'Tx Hung' detect logic - * 6.2.7 8/17/05 - * o Report rx buffer allocation failures and tx timeout counts in stats - * 6.2.6 8/16/05 - * o Implement workaround for controller erratum -- linear non-tso packet - * following a TSO gets written back prematurely - * 6.2.5 8/15/05 - * o Set netdev->tx_queue_len based on link speed/duplex settings. - * o Fix net_stats.rx_fifo_errors - * o Do not power off PHY if SoL/IDER session is active - * 6.2.4 8/10/05 - * o Fix loopback test setup/cleanup for 82571/3 controllers - * o Fix parsing of outgoing packets (e1000_transfer_dhcp_info) to treat - * all packets as raw - * o Prevent operations that will cause the PHY to be reset if SoL/IDER - * sessions are active and log a message - * 6.2.2 7/21/05 - * o used fixed size descriptors for all MTU sizes, reduces memory load - * 6.1.2 4/13/05 - * o Fixed ethtool diagnostics - * o Enabled flow control to take default eeprom settings - * o Added stats_lock around e1000_read_phy_reg commands to avoid concurrent - * calls, one from mii_ioctl and other from within update_stats while - * processing MIIREG ioctl. - */ +#include char e1000_driver_name[] = "e1000"; static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; @@ -103,9 +36,9 @@ static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; #else #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "6.3.9-k4"DRIVERNAPI +#define DRV_VERSION "7.3.15-k2"DRIVERNAPI char e1000_driver_version[] = DRV_VERSION; -static char e1000_copyright[] = "Copyright (c) 1999-2005 Intel Corporation."; +static char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; /* e1000_pci_tbl - PCI Device ID Table * @@ -140,6 +73,11 @@ static struct pci_device_id e1000_pci_tbl[] = { INTEL_E1000_ETHERNET_DEVICE(0x1026), INTEL_E1000_ETHERNET_DEVICE(0x1027), INTEL_E1000_ETHERNET_DEVICE(0x1028), + INTEL_E1000_ETHERNET_DEVICE(0x1049), + INTEL_E1000_ETHERNET_DEVICE(0x104A), + INTEL_E1000_ETHERNET_DEVICE(0x104B), + INTEL_E1000_ETHERNET_DEVICE(0x104C), + INTEL_E1000_ETHERNET_DEVICE(0x104D), INTEL_E1000_ETHERNET_DEVICE(0x105E), INTEL_E1000_ETHERNET_DEVICE(0x105F), INTEL_E1000_ETHERNET_DEVICE(0x1060), @@ -157,9 +95,18 @@ static struct pci_device_id e1000_pci_tbl[] = { INTEL_E1000_ETHERNET_DEVICE(0x108A), INTEL_E1000_ETHERNET_DEVICE(0x108B), INTEL_E1000_ETHERNET_DEVICE(0x108C), + INTEL_E1000_ETHERNET_DEVICE(0x1096), + INTEL_E1000_ETHERNET_DEVICE(0x1098), INTEL_E1000_ETHERNET_DEVICE(0x1099), INTEL_E1000_ETHERNET_DEVICE(0x109A), + INTEL_E1000_ETHERNET_DEVICE(0x10A4), INTEL_E1000_ETHERNET_DEVICE(0x10B5), + INTEL_E1000_ETHERNET_DEVICE(0x10B9), + INTEL_E1000_ETHERNET_DEVICE(0x10BA), + INTEL_E1000_ETHERNET_DEVICE(0x10BB), + INTEL_E1000_ETHERNET_DEVICE(0x10BC), + INTEL_E1000_ETHERNET_DEVICE(0x10C4), + INTEL_E1000_ETHERNET_DEVICE(0x10C5), /* required last entry */ {0,} }; @@ -168,6 +115,7 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); int e1000_up(struct e1000_adapter *adapter); void e1000_down(struct e1000_adapter *adapter); +void e1000_reinit_locked(struct e1000_adapter *adapter); void e1000_reset(struct e1000_adapter *adapter); int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); int e1000_setup_all_tx_resources(struct e1000_adapter *adapter); @@ -175,25 +123,20 @@ int e1000_setup_all_rx_resources(struct e1000_adapter *adapter); void e1000_free_all_tx_resources(struct e1000_adapter *adapter); void e1000_free_all_rx_resources(struct e1000_adapter *adapter); static int e1000_setup_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *txdr); + struct e1000_tx_ring *txdr); static int e1000_setup_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rxdr); + struct e1000_rx_ring *rxdr); static void e1000_free_tx_resources(struct e1000_adapter *adapter, - struct e1000_tx_ring *tx_ring); + struct e1000_tx_ring *tx_ring); static void e1000_free_rx_resources(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); + struct e1000_rx_ring *rx_ring); void e1000_update_stats(struct e1000_adapter *adapter); -/* Local Function Prototypes */ - static int e1000_init_module(void); static void e1000_exit_module(void); static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void __devexit e1000_remove(struct pci_dev *pdev); static int e1000_alloc_queues(struct e1000_adapter *adapter); -#ifdef CONFIG_E1000_MQ -static void e1000_setup_queue_mapping(struct e1000_adapter *adapter); -#endif static int e1000_sw_init(struct e1000_adapter *adapter); static int e1000_open(struct net_device *netdev); static int e1000_close(struct net_device *netdev); @@ -209,13 +152,15 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, static void e1000_set_multi(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); -static void e1000_watchdog_task(struct e1000_adapter *adapter); static void e1000_82547_tx_fifo_stall(unsigned long data); static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); static struct net_device_stats * e1000_get_stats(struct net_device *netdev); static int e1000_change_mtu(struct net_device *netdev, int new_mtu); static int e1000_set_mac(struct net_device *netdev, void *p); -static irqreturn_t e1000_intr(int irq, void *data, struct pt_regs *regs); +static irqreturn_t e1000_intr(int irq, void *data); +#ifdef CONFIG_PCI_MSI +static irqreturn_t e1000_intr_msi(int irq, void *data); +#endif static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); #ifdef CONFIG_E1000_NAPI @@ -245,45 +190,58 @@ void e1000_set_ethtool_ops(struct net_device *netdev); static void e1000_enter_82542_rst(struct e1000_adapter *adapter); static void e1000_leave_82542_rst(struct e1000_adapter *adapter); static void e1000_tx_timeout(struct net_device *dev); -static void e1000_tx_timeout_task(struct net_device *dev); +static void e1000_reset_task(struct work_struct *work); static void e1000_smartspeed(struct e1000_adapter *adapter); -static inline int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, - struct sk_buff *skb); +static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, + struct sk_buff *skb); static void e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp); static void e1000_vlan_rx_add_vid(struct net_device *netdev, uint16_t vid); static void e1000_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); -#ifdef CONFIG_PM static int e1000_suspend(struct pci_dev *pdev, pm_message_t state); +#ifdef CONFIG_PM static int e1000_resume(struct pci_dev *pdev); #endif +static void e1000_shutdown(struct pci_dev *pdev); #ifdef CONFIG_NET_POLL_CONTROLLER /* for netdump / net console */ static void e1000_netpoll (struct net_device *netdev); #endif -#ifdef CONFIG_E1000_MQ -/* for multiple Rx queues */ -void e1000_rx_schedule(void *data); -#endif - -/* Exported from other modules */ - extern void e1000_check_options(struct e1000_adapter *adapter); +#define COPYBREAK_DEFAULT 256 +static unsigned int copybreak __read_mostly = COPYBREAK_DEFAULT; +module_param(copybreak, uint, 0644); +MODULE_PARM_DESC(copybreak, + "Maximum size of packet that is copied to a new buffer on receive"); + +static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state); +static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev); +static void e1000_io_resume(struct pci_dev *pdev); + +static struct pci_error_handlers e1000_err_handler = { + .error_detected = e1000_io_error_detected, + .slot_reset = e1000_io_slot_reset, + .resume = e1000_io_resume, +}; + static struct pci_driver e1000_driver = { .name = e1000_driver_name, .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = __devexit_p(e1000_remove), - /* Power Managment Hooks */ #ifdef CONFIG_PM + /* Power Managment Hooks */ .suspend = e1000_suspend, - .resume = e1000_resume + .resume = e1000_resume, #endif + .shutdown = e1000_shutdown, + .err_handler = &e1000_err_handler }; MODULE_AUTHOR("Intel Corporation, "); @@ -311,8 +269,14 @@ e1000_init_module(void) printk(KERN_INFO "%s\n", e1000_copyright); - ret = pci_module_init(&e1000_driver); - + ret = pci_register_driver(&e1000_driver); + if (copybreak != COPYBREAK_DEFAULT) { + if (copybreak == 0) + printk(KERN_INFO "e1000: copybreak disabled\n"); + else + printk(KERN_INFO "e1000: copybreak enabled for " + "packets <= %u bytes\n", copybreak); + } return ret; } @@ -333,12 +297,56 @@ e1000_exit_module(void) module_exit(e1000_exit_module); +static int e1000_request_irq(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int flags, err = 0; + + flags = IRQF_SHARED; +#ifdef CONFIG_PCI_MSI + if (adapter->hw.mac_type >= e1000_82571) { + adapter->have_msi = TRUE; + if ((err = pci_enable_msi(adapter->pdev))) { + DPRINTK(PROBE, ERR, + "Unable to allocate MSI interrupt Error: %d\n", err); + adapter->have_msi = FALSE; + } + } + if (adapter->have_msi) { + flags &= ~IRQF_SHARED; + err = request_irq(adapter->pdev->irq, &e1000_intr_msi, flags, + netdev->name, netdev); + if (err) + DPRINTK(PROBE, ERR, + "Unable to allocate interrupt Error: %d\n", err); + } else +#endif + if ((err = request_irq(adapter->pdev->irq, &e1000_intr, flags, + netdev->name, netdev))) + DPRINTK(PROBE, ERR, + "Unable to allocate interrupt Error: %d\n", err); + + return err; +} + +static void e1000_free_irq(struct e1000_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + free_irq(adapter->pdev->irq, netdev); + +#ifdef CONFIG_PCI_MSI + if (adapter->have_msi) + pci_disable_msi(adapter->pdev); +#endif +} + /** * e1000_irq_disable - Mask off interrupt generation on the NIC * @adapter: board private structure **/ -static inline void +static void e1000_irq_disable(struct e1000_adapter *adapter) { atomic_inc(&adapter->irq_sem); @@ -352,7 +360,7 @@ e1000_irq_disable(struct e1000_adapter *adapter) * @adapter: board private structure **/ -static inline void +static void e1000_irq_enable(struct e1000_adapter *adapter) { if (likely(atomic_dec_and_test(&adapter->irq_sem))) { @@ -380,7 +388,8 @@ e1000_update_mng_vlan(struct e1000_adapter *adapter) (vid != old_vid) && !adapter->vlgrp->vlan_devices[old_vid]) e1000_vlan_rx_kill_vid(netdev, old_vid); - } + } else + adapter->mng_vlan_id = vid; } } @@ -391,20 +400,22 @@ e1000_update_mng_vlan(struct e1000_adapter *adapter) * e1000_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. For AMT version (only with 82573) i - * of the f/w this means that the netowrk i/f is closed. - * + * of the f/w this means that the network i/f is closed. + * **/ -static inline void +static void e1000_release_hw_control(struct e1000_adapter *adapter) { uint32_t ctrl_ext; uint32_t swsm; + uint32_t extcnf; /* Let firmware taken over control of h/w */ switch (adapter->hw.mac_type) { case e1000_82571: case e1000_82572: + case e1000_80003es2lan: ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); @@ -413,6 +424,11 @@ e1000_release_hw_control(struct e1000_adapter *adapter) swsm = E1000_READ_REG(&adapter->hw, SWSM); E1000_WRITE_REG(&adapter->hw, SWSM, swsm & ~E1000_SWSM_DRV_LOAD); + case e1000_ich8lan: + extcnf = E1000_READ_REG(&adapter->hw, CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, CTRL_EXT, + extcnf & ~E1000_CTRL_EXT_DRV_LOAD); + break; default: break; } @@ -423,21 +439,24 @@ e1000_release_hw_control(struct e1000_adapter *adapter) * @adapter: address of board private structure * * e1000_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. For AMT version (only with 82573) - * of the f/w this means that the netowrk i/f is open. - * + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. For AMT version (only with 82573) + * of the f/w this means that the network i/f is open. + * **/ -static inline void +static void e1000_get_hw_control(struct e1000_adapter *adapter) { uint32_t ctrl_ext; uint32_t swsm; + uint32_t extcnf; + /* Let firmware know the driver has taken over */ switch (adapter->hw.mac_type) { case e1000_82571: case e1000_82572: + case e1000_80003es2lan: ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT); E1000_WRITE_REG(&adapter->hw, CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); @@ -447,30 +466,74 @@ e1000_get_hw_control(struct e1000_adapter *adapter) E1000_WRITE_REG(&adapter->hw, SWSM, swsm | E1000_SWSM_DRV_LOAD); break; + case e1000_ich8lan: + extcnf = E1000_READ_REG(&adapter->hw, EXTCNF_CTRL); + E1000_WRITE_REG(&adapter->hw, EXTCNF_CTRL, + extcnf | E1000_EXTCNF_CTRL_SWFLAG); + break; default: break; } } +static void +e1000_init_manageability(struct e1000_adapter *adapter) +{ + if (adapter->en_mng_pt) { + uint32_t manc = E1000_READ_REG(&adapter->hw, MANC); + + /* disable hardware interception of ARP */ + manc &= ~(E1000_MANC_ARP_EN); + + /* enable receiving management packets to the host */ + /* this will probably generate destination unreachable messages + * from the host OS, but the packets will be handled on SMBUS */ + if (adapter->hw.has_manc2h) { + uint32_t manc2h = E1000_READ_REG(&adapter->hw, MANC2H); + + manc |= E1000_MANC_EN_MNG2HOST; +#define E1000_MNG2HOST_PORT_623 (1 << 5) +#define E1000_MNG2HOST_PORT_664 (1 << 6) + manc2h |= E1000_MNG2HOST_PORT_623; + manc2h |= E1000_MNG2HOST_PORT_664; + E1000_WRITE_REG(&adapter->hw, MANC2H, manc2h); + } + + E1000_WRITE_REG(&adapter->hw, MANC, manc); + } +} + +static void +e1000_release_manageability(struct e1000_adapter *adapter) +{ + if (adapter->en_mng_pt) { + uint32_t manc = E1000_READ_REG(&adapter->hw, MANC); + + /* re-enable hardware interception of ARP */ + manc |= E1000_MANC_ARP_EN; + + if (adapter->hw.has_manc2h) + manc &= ~E1000_MANC_EN_MNG2HOST; + + /* don't explicitly have to mess with MANC2H since + * MANC has an enable disable that gates MANC2H */ + + E1000_WRITE_REG(&adapter->hw, MANC, manc); + } +} + int e1000_up(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int i, err; + int i; /* hardware has been reset, we need to reload some things */ - /* Reset the PHY if it was previously powered down */ - if (adapter->hw.media_type == e1000_media_type_copper) { - uint16_t mii_reg; - e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); - if (mii_reg & MII_CR_POWER_DOWN) - e1000_phy_reset(&adapter->hw); - } - e1000_set_multi(netdev); e1000_restore_vlan(adapter); + e1000_init_manageability(adapter); e1000_configure_tx(adapter); e1000_setup_rctl(adapter); @@ -484,57 +547,101 @@ e1000_up(struct e1000_adapter *adapter) E1000_DESC_UNUSED(ring)); } -#ifdef CONFIG_PCI_MSI - if (adapter->hw.mac_type > e1000_82547_rev_2) { - adapter->have_msi = TRUE; - if ((err = pci_enable_msi(adapter->pdev))) { - DPRINTK(PROBE, ERR, - "Unable to allocate MSI interrupt Error: %d\n", err); - adapter->have_msi = FALSE; - } - } -#endif - if ((err = request_irq(adapter->pdev->irq, &e1000_intr, - SA_SHIRQ | SA_SAMPLE_RANDOM, - netdev->name, netdev))) { - DPRINTK(PROBE, ERR, - "Unable to allocate interrupt Error: %d\n", err); - return err; - } - -#ifdef CONFIG_E1000_MQ - e1000_setup_queue_mapping(adapter); -#endif - adapter->tx_queue_len = netdev->tx_queue_len; - mod_timer(&adapter->watchdog_timer, jiffies); - #ifdef CONFIG_E1000_NAPI netif_poll_enable(netdev); #endif e1000_irq_enable(adapter); + clear_bit(__E1000_DOWN, &adapter->flags); + + /* fire a link change interrupt to start the watchdog */ + E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC); return 0; } +/** + * e1000_power_up_phy - restore link in case the phy was powered down + * @adapter: address of board private structure + * + * The phy may be powered down to save power and turn off link when the + * driver is unloaded and wake on lan is not enabled (among others) + * *** this routine MUST be followed by a call to e1000_reset *** + * + **/ + +void e1000_power_up_phy(struct e1000_adapter *adapter) +{ + uint16_t mii_reg = 0; + + /* Just clear the power down bit to wake the phy back up */ + if (adapter->hw.media_type == e1000_media_type_copper) { + /* according to the manual, the phy will retain its + * settings across a power-down/up cycle */ + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); + mii_reg &= ~MII_CR_POWER_DOWN; + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg); + } +} + +static void e1000_power_down_phy(struct e1000_adapter *adapter) +{ + /* Power down the PHY so no link is implied when interface is down * + * The PHY cannot be powered down if any of the following is TRUE * + * (a) WoL is enabled + * (b) AMT is active + * (c) SoL/IDER session is active */ + if (!adapter->wol && adapter->hw.mac_type >= e1000_82540 && + adapter->hw.media_type == e1000_media_type_copper) { + uint16_t mii_reg = 0; + + switch (adapter->hw.mac_type) { + case e1000_82540: + case e1000_82545: + case e1000_82545_rev_3: + case e1000_82546: + case e1000_82546_rev_3: + case e1000_82541: + case e1000_82541_rev_2: + case e1000_82547: + case e1000_82547_rev_2: + if (E1000_READ_REG(&adapter->hw, MANC) & + E1000_MANC_SMBUS_EN) + goto out; + break; + case e1000_82571: + case e1000_82572: + case e1000_82573: + case e1000_80003es2lan: + case e1000_ich8lan: + if (e1000_check_mng_mode(&adapter->hw) || + e1000_check_phy_reset_block(&adapter->hw)) + goto out; + break; + default: + goto out; + } + e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); + mii_reg |= MII_CR_POWER_DOWN; + e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg); + mdelay(1); + } +out: + return; +} + void e1000_down(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - boolean_t mng_mode_enabled = (adapter->hw.mac_type >= e1000_82571) && - e1000_check_mng_mode(&adapter->hw); + + /* signal that we're down so the interrupt handler does not + * reschedule our watchdog timer */ + set_bit(__E1000_DOWN, &adapter->flags); e1000_irq_disable(adapter); -#ifdef CONFIG_E1000_MQ - while (atomic_read(&adapter->rx_sched_call_data.count) != 0); -#endif - free_irq(adapter->pdev->irq, netdev); -#ifdef CONFIG_PCI_MSI - if (adapter->hw.mac_type > e1000_82547_rev_2 && - adapter->have_msi == TRUE) - pci_disable_msi(adapter->pdev); -#endif + del_timer_sync(&adapter->tx_fifo_stall_timer); del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); @@ -551,63 +658,132 @@ e1000_down(struct e1000_adapter *adapter) e1000_reset(adapter); e1000_clean_all_tx_rings(adapter); e1000_clean_all_rx_rings(adapter); +} - /* Power down the PHY so no link is implied when interface is down * - * The PHY cannot be powered down if any of the following is TRUE * - * (a) WoL is enabled - * (b) AMT is active - * (c) SoL/IDER session is active */ - if (!adapter->wol && adapter->hw.mac_type >= e1000_82540 && - adapter->hw.media_type == e1000_media_type_copper && - !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN) && - !mng_mode_enabled && - !e1000_check_phy_reset_block(&adapter->hw)) { - uint16_t mii_reg; - e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg); - mii_reg |= MII_CR_POWER_DOWN; - e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg); - mdelay(1); - } +void +e1000_reinit_locked(struct e1000_adapter *adapter) +{ + WARN_ON(in_interrupt()); + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) + msleep(1); + e1000_down(adapter); + e1000_up(adapter); + clear_bit(__E1000_RESETTING, &adapter->flags); } void e1000_reset(struct e1000_adapter *adapter) { - uint32_t pba, manc; + uint32_t pba = 0, tx_space, min_tx_space, min_rx_space; uint16_t fc_high_water_mark = E1000_FC_HIGH_DIFF; + boolean_t legacy_pba_adjust = FALSE; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. */ switch (adapter->hw.mac_type) { + case e1000_82542_rev2_0: + case e1000_82542_rev2_1: + case e1000_82543: + case e1000_82544: + case e1000_82540: + case e1000_82541: + case e1000_82541_rev_2: + legacy_pba_adjust = TRUE; + pba = E1000_PBA_48K; + break; + case e1000_82545: + case e1000_82545_rev_3: + case e1000_82546: + case e1000_82546_rev_3: + pba = E1000_PBA_48K; + break; case e1000_82547: case e1000_82547_rev_2: + legacy_pba_adjust = TRUE; pba = E1000_PBA_30K; break; case e1000_82571: case e1000_82572: + case e1000_80003es2lan: pba = E1000_PBA_38K; break; case e1000_82573: - pba = E1000_PBA_12K; + pba = E1000_PBA_20K; break; - default: - pba = E1000_PBA_48K; + case e1000_ich8lan: + pba = E1000_PBA_8K; + case e1000_undefined: + case e1000_num_macs: break; } - if ((adapter->hw.mac_type != e1000_82573) && - (adapter->netdev->mtu > E1000_RXBUFFER_8192)) - pba -= 8; /* allocate more FIFO for Tx */ + if (legacy_pba_adjust == TRUE) { + if (adapter->netdev->mtu > E1000_RXBUFFER_8192) + pba -= 8; /* allocate more FIFO for Tx */ + if (adapter->hw.mac_type == e1000_82547) { + adapter->tx_fifo_head = 0; + adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT; + adapter->tx_fifo_size = + (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT; + atomic_set(&adapter->tx_fifo_stall, 0); + } + } else if (adapter->hw.max_frame_size > MAXIMUM_ETHERNET_FRAME_SIZE) { + /* adjust PBA for jumbo frames */ + E1000_WRITE_REG(&adapter->hw, PBA, pba); + + /* To maintain wire speed transmits, the Tx FIFO should be + * large enough to accomodate two full transmit packets, + * rounded up to the next 1KB and expressed in KB. Likewise, + * the Rx FIFO should be large enough to accomodate at least + * one full receive packet and is similarly rounded up and + * expressed in KB. */ + pba = E1000_READ_REG(&adapter->hw, PBA); + /* upper 16 bits has Tx packet buffer allocation size in KB */ + tx_space = pba >> 16; + /* lower 16 bits has Rx packet buffer allocation size in KB */ + pba &= 0xffff; + /* don't include ethernet FCS because hardware appends/strips */ + min_rx_space = adapter->netdev->mtu + ENET_HEADER_SIZE + + VLAN_TAG_SIZE; + min_tx_space = min_rx_space; + min_tx_space *= 2; + E1000_ROUNDUP(min_tx_space, 1024); + min_tx_space >>= 10; + E1000_ROUNDUP(min_rx_space, 1024); + min_rx_space >>= 10; + + /* If current Tx allocation is less than the min Tx FIFO size, + * and the min Tx FIFO size is less than the current Rx FIFO + * allocation, take space away from current Rx allocation */ + if (tx_space < min_tx_space && + ((min_tx_space - tx_space) < pba)) { + pba = pba - (min_tx_space - tx_space); + + /* PCI/PCIx hardware has PBA alignment constraints */ + switch (adapter->hw.mac_type) { + case e1000_82545 ... e1000_82546_rev_3: + pba &= ~(E1000_PBA_8K - 1); + break; + default: + break; + } - if (adapter->hw.mac_type == e1000_82547) { - adapter->tx_fifo_head = 0; - adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT; - adapter->tx_fifo_size = - (E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT; - atomic_set(&adapter->tx_fifo_stall, 0); + /* if short on rx space, rx wins and must trump tx + * adjustment or use Early Receive if available */ + if (pba < min_rx_space) { + switch (adapter->hw.mac_type) { + case e1000_82573: + /* ERT enabled in e1000_configure_rx */ + break; + default: + pba = min_rx_space; + break; + } + } + } } E1000_WRITE_REG(&adapter->hw, PBA, pba); @@ -616,10 +792,19 @@ e1000_reset(struct e1000_adapter *adapter) /* Set the FC high water mark to 90% of the FIFO size. * Required to clear last 3 LSB */ fc_high_water_mark = ((pba * 9216)/10) & 0xFFF8; + /* We can't use 90% on small FIFOs because the remainder + * would be less than 1 full frame. In this case, we size + * it to allow at least a full frame above the high water + * mark. */ + if (pba < E1000_PBA_16K) + fc_high_water_mark = (pba * 1024) - 1600; adapter->hw.fc_high_water = fc_high_water_mark; adapter->hw.fc_low_water = fc_high_water_mark - 8; - adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; + if (adapter->hw.mac_type == e1000_80003es2lan) + adapter->hw.fc_pause_time = 0xFFFF; + else + adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME; adapter->hw.fc_send_xon = 1; adapter->hw.fc = adapter->hw.original_fc; @@ -627,19 +812,45 @@ e1000_reset(struct e1000_adapter *adapter) e1000_reset_hw(&adapter->hw); if (adapter->hw.mac_type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, WUC, 0); + if (e1000_init_hw(&adapter->hw)) DPRINTK(PROBE, ERR, "Hardware Error\n"); e1000_update_mng_vlan(adapter); + + /* if (adapter->hwflags & HWFLAGS_PHY_PWR_BIT) { */ + if (adapter->hw.mac_type >= e1000_82544 && + adapter->hw.mac_type <= e1000_82547_rev_2 && + adapter->hw.autoneg == 1 && + adapter->hw.autoneg_advertised == ADVERTISE_1000_FULL) { + uint32_t ctrl = E1000_READ_REG(&adapter->hw, CTRL); + /* clear phy power management bit if we are in gig only mode, + * which if enabled will attempt negotiation to 100Mb, which + * can cause a loss of link at power off or driver unload */ + ctrl &= ~E1000_CTRL_SWDPIN3; + E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); + } + /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE); e1000_reset_adaptive(&adapter->hw); e1000_phy_get_info(&adapter->hw, &adapter->phy_info); - if (adapter->en_mng_pt) { - manc = E1000_READ_REG(&adapter->hw, MANC); - manc |= (E1000_MANC_ARP_EN | E1000_MANC_EN_MNG2HOST); - E1000_WRITE_REG(&adapter->hw, MANC, manc); + + if (!adapter->smart_power_down && + (adapter->hw.mac_type == e1000_82571 || + adapter->hw.mac_type == e1000_82572)) { + uint16_t phy_data = 0; + /* speed up time to link by disabling smart power down, ignore + * the return value of this function because there is nothing + * different we would do if it failed */ + e1000_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, + &phy_data); + phy_data &= ~IGP02E1000_PM_SPD; + e1000_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, + phy_data); } + + e1000_release_manageability(adapter); } /** @@ -661,34 +872,37 @@ e1000_probe(struct pci_dev *pdev, struct net_device *netdev; struct e1000_adapter *adapter; unsigned long mmio_start, mmio_len; + unsigned long flash_start, flash_len; static int cards_found = 0; + static int global_quad_port_a = 0; /* global ksp3 port a indication */ int i, err, pci_using_dac; - uint16_t eeprom_data; + uint16_t eeprom_data = 0; uint16_t eeprom_apme_mask = E1000_EEPROM_APME; if ((err = pci_enable_device(pdev))) return err; - if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { + if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) && + !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) { pci_using_dac = 1; } else { - if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { + if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) && + (err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK))) { E1000_ERR("No usable DMA configuration, aborting\n"); - return err; + goto err_dma; } pci_using_dac = 0; } if ((err = pci_request_regions(pdev, e1000_driver_name))) - return err; + goto err_pci_reg; pci_set_master(pdev); + err = -ENOMEM; netdev = alloc_etherdev(sizeof(struct e1000_adapter)); - if (!netdev) { - err = -ENOMEM; + if (!netdev) goto err_alloc_etherdev; - } SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &pdev->dev); @@ -703,11 +917,10 @@ e1000_probe(struct pci_dev *pdev, mmio_start = pci_resource_start(pdev, BAR_0); mmio_len = pci_resource_len(pdev, BAR_0); + err = -EIO; adapter->hw.hw_addr = ioremap(mmio_start, mmio_len); - if (!adapter->hw.hw_addr) { - err = -EIO; + if (!adapter->hw.hw_addr) goto err_ioremap; - } for (i = BAR_1; i <= BAR_5; i++) { if (pci_resource_len(pdev, i) == 0) @@ -739,7 +952,7 @@ e1000_probe(struct pci_dev *pdev, #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = e1000_netpoll; #endif - strcpy(netdev->name, pci_name(pdev)); + strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len; @@ -752,7 +965,19 @@ e1000_probe(struct pci_dev *pdev, if ((err = e1000_sw_init(adapter))) goto err_sw_init; - if ((err = e1000_check_phy_reset_block(&adapter->hw))) + err = -EIO; + /* Flash BAR mapping must happen after e1000_sw_init + * because it depends on mac_type */ + if ((adapter->hw.mac_type == e1000_ich8lan) && + (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { + flash_start = pci_resource_start(pdev, 1); + flash_len = pci_resource_len(pdev, 1); + adapter->hw.flash_address = ioremap(flash_start, flash_len); + if (!adapter->hw.flash_address) + goto err_flashmap; + } + + if (e1000_check_phy_reset_block(&adapter->hw)) DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n"); if (adapter->hw.mac_type >= e1000_82543) { @@ -761,6 +986,8 @@ e1000_probe(struct pci_dev *pdev, NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; + if (adapter->hw.mac_type == e1000_ich8lan) + netdev->features &= ~NETIF_F_HW_VLAN_FILTER; } #ifdef NETIF_F_TSO @@ -768,19 +995,25 @@ e1000_probe(struct pci_dev *pdev, (adapter->hw.mac_type != e1000_82547)) netdev->features |= NETIF_F_TSO; -#ifdef NETIF_F_TSO_IPV6 +#ifdef NETIF_F_TSO6 if (adapter->hw.mac_type > e1000_82547_rev_2) - netdev->features |= NETIF_F_TSO_IPV6; + netdev->features |= NETIF_F_TSO6; #endif #endif if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - /* hard_start_xmit is safe against parallel locking */ - netdev->features |= NETIF_F_LLTX; - + netdev->features |= NETIF_F_LLTX; + adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); + /* initialize eeprom parameters */ + + if (e1000_init_eeprom_params(&adapter->hw)) { + E1000_ERR("EEPROM initialization failed\n"); + goto err_eeprom; + } + /* before reading the EEPROM, reset the controller to * put the device in a known good starting state */ @@ -790,7 +1023,6 @@ e1000_probe(struct pci_dev *pdev, if (e1000_validate_eeprom_checksum(&adapter->hw) < 0) { DPRINTK(PROBE, ERR, "The EEPROM Checksum Is Not Valid\n"); - err = -EIO; goto err_eeprom; } @@ -803,12 +1035,9 @@ e1000_probe(struct pci_dev *pdev, if (!is_valid_ether_addr(netdev->perm_addr)) { DPRINTK(PROBE, ERR, "Invalid MAC Address\n"); - err = -EIO; goto err_eeprom; } - e1000_read_part_num(&adapter->hw, &(adapter->part_num)); - e1000_get_bus_info(&adapter->hw); init_timer(&adapter->tx_fifo_stall_timer); @@ -819,20 +1048,11 @@ e1000_probe(struct pci_dev *pdev, adapter->watchdog_timer.function = &e1000_watchdog; adapter->watchdog_timer.data = (unsigned long) adapter; - INIT_WORK(&adapter->watchdog_task, - (void (*)(void *))e1000_watchdog_task, adapter); - init_timer(&adapter->phy_info_timer); adapter->phy_info_timer.function = &e1000_update_phy_info; adapter->phy_info_timer.data = (unsigned long) adapter; - INIT_WORK(&adapter->tx_timeout_task, - (void (*)(void *))e1000_tx_timeout_task, netdev); - - /* we're going to reset, so assume we have no link for now */ - - netif_carrier_off(netdev); - netif_stop_queue(netdev); + INIT_WORK(&adapter->reset_task, e1000_reset_task); e1000_check_options(adapter); @@ -851,9 +1071,15 @@ e1000_probe(struct pci_dev *pdev, EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data); eeprom_apme_mask = E1000_EEPROM_82544_APM; break; + case e1000_ich8lan: + e1000_read_eeprom(&adapter->hw, + EEPROM_INIT_CONTROL1_REG, 1, &eeprom_data); + eeprom_apme_mask = E1000_EEPROM_ICH8_APME; + break; case e1000_82546: case e1000_82546_rev_3: case e1000_82571: + case e1000_80003es2lan: if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1){ e1000_read_eeprom(&adapter->hw, EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); @@ -866,7 +1092,39 @@ e1000_probe(struct pci_dev *pdev, break; } if (eeprom_data & eeprom_apme_mask) - adapter->wol |= E1000_WUFC_MAG; + adapter->eeprom_wol |= E1000_WUFC_MAG; + + /* now that we have the eeprom settings, apply the special cases + * where the eeprom may be wrong or the board simply won't support + * wake on lan on a particular port */ + switch (pdev->device) { + case E1000_DEV_ID_82546GB_PCIE: + adapter->eeprom_wol = 0; + break; + case E1000_DEV_ID_82546EB_FIBER: + case E1000_DEV_ID_82546GB_FIBER: + case E1000_DEV_ID_82571EB_FIBER: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1) + adapter->eeprom_wol = 0; + break; + case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: + case E1000_DEV_ID_82571EB_QUAD_COPPER: + case E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->eeprom_wol = 0; + else + adapter->quad_port_a = 1; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + } + + /* initialize the wol settings based on the eeprom settings */ + adapter->wol = adapter->eeprom_wol; /* print bus type/speed/width info */ { @@ -903,19 +1161,43 @@ e1000_probe(struct pci_dev *pdev, if ((err = register_netdev(netdev))) goto err_register; + /* tell the stack to leave us alone until e1000_open() is called */ + netif_carrier_off(netdev); + netif_stop_queue(netdev); + DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n"); cards_found++; return 0; err_register: -err_sw_init: + e1000_release_hw_control(adapter); err_eeprom: + if (!e1000_check_phy_reset_block(&adapter->hw)) + e1000_phy_hw_reset(&adapter->hw); + + if (adapter->hw.flash_address) + iounmap(adapter->hw.flash_address); +err_flashmap: +#ifdef CONFIG_E1000_NAPI + for (i = 0; i < adapter->num_rx_queues; i++) + dev_put(&adapter->polling_netdev[i]); +#endif + + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); +#ifdef CONFIG_E1000_NAPI + kfree(adapter->polling_netdev); +#endif +err_sw_init: iounmap(adapter->hw.hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_regions(pdev); +err_pci_reg: +err_dma: + pci_disable_device(pdev); return err; } @@ -934,21 +1216,13 @@ e1000_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - uint32_t manc; #ifdef CONFIG_E1000_NAPI int i; #endif flush_scheduled_work(); - if (adapter->hw.mac_type >= e1000_82540 && - adapter->hw.media_type == e1000_media_type_copper) { - manc = E1000_READ_REG(&adapter->hw, MANC); - if (manc & E1000_MANC_SMBUS_EN) { - manc |= E1000_MANC_ARP_EN; - E1000_WRITE_REG(&adapter->hw, MANC, manc); - } - } + e1000_release_manageability(adapter); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ @@ -957,7 +1231,7 @@ e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); #ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) - __dev_put(&adapter->polling_netdev[i]); + dev_put(&adapter->polling_netdev[i]); #endif if (!e1000_check_phy_reset_block(&adapter->hw)) @@ -970,12 +1244,10 @@ e1000_remove(struct pci_dev *pdev) #endif iounmap(adapter->hw.hw_addr); + if (adapter->hw.flash_address) + iounmap(adapter->hw.flash_address); pci_release_regions(pdev); -#ifdef CONFIG_E1000_MQ - free_percpu(adapter->cpu_netdev); - free_percpu(adapter->cpu_tx_ring); -#endif free_netdev(netdev); pci_disable_device(pdev); @@ -1011,8 +1283,8 @@ e1000_sw_init(struct e1000_adapter *adapter) pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word); - adapter->rx_buffer_len = E1000_RXBUFFER_2048; - adapter->rx_ps_bsize0 = E1000_RXBUFFER_256; + adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE; + adapter->rx_ps_bsize0 = E1000_RXBUFFER_128; hw->max_frame_size = netdev->mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE; @@ -1024,13 +1296,6 @@ e1000_sw_init(struct e1000_adapter *adapter) return -EIO; } - /* initialize eeprom parameters */ - - if (e1000_init_eeprom_params(hw)) { - E1000_ERR("EEPROM initialization failed\n"); - return -EIO; - } - switch (hw->mac_type) { default: break; @@ -1056,40 +1321,8 @@ e1000_sw_init(struct e1000_adapter *adapter) hw->master_slave = E1000_MASTER_SLAVE; } -#ifdef CONFIG_E1000_MQ - /* Number of supported queues */ - switch (hw->mac_type) { - case e1000_82571: - case e1000_82572: - /* These controllers support 2 tx queues, but with a single - * qdisc implementation, multiple tx queues aren't quite as - * interesting. If we can find a logical way of mapping - * flows to a queue, then perhaps we can up the num_tx_queue - * count back to its default. Until then, we run the risk of - * terrible performance due to SACK overload. */ - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 2; - break; - default: - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 1; - break; - } - adapter->num_rx_queues = min(adapter->num_rx_queues, num_online_cpus()); - adapter->num_tx_queues = min(adapter->num_tx_queues, num_online_cpus()); - DPRINTK(DRV, INFO, "Multiqueue Enabled: Rx Queue count = %u %s\n", - adapter->num_rx_queues, - ((adapter->num_rx_queues == 1) - ? ((num_online_cpus() > 1) - ? "(due to unsupported feature in current adapter)" - : "(due to unsupported system configuration)") - : "")); - DPRINTK(DRV, INFO, "Multiqueue Enabled: Tx Queue count = %u\n", - adapter->num_tx_queues); -#else adapter->num_tx_queues = 1; adapter->num_rx_queues = 1; -#endif if (e1000_alloc_queues(adapter)) { DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); @@ -1110,6 +1343,8 @@ e1000_sw_init(struct e1000_adapter *adapter) atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->stats_lock); + set_bit(__E1000_DOWN, &adapter->flags); + return 0; } @@ -1152,51 +1387,9 @@ e1000_alloc_queues(struct e1000_adapter *adapter) memset(adapter->polling_netdev, 0, size); #endif -#ifdef CONFIG_E1000_MQ - adapter->rx_sched_call_data.func = e1000_rx_schedule; - adapter->rx_sched_call_data.info = adapter->netdev; - - adapter->cpu_netdev = alloc_percpu(struct net_device *); - adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); -#endif - return E1000_SUCCESS; } -#ifdef CONFIG_E1000_MQ -static void __devinit -e1000_setup_queue_mapping(struct e1000_adapter *adapter) -{ - int i, cpu; - - adapter->rx_sched_call_data.func = e1000_rx_schedule; - adapter->rx_sched_call_data.info = adapter->netdev; - cpus_clear(adapter->rx_sched_call_data.cpumask); - - adapter->cpu_netdev = alloc_percpu(struct net_device *); - adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *); - - lock_cpu_hotplug(); - i = 0; - for_each_online_cpu(cpu) { - *per_cpu_ptr(adapter->cpu_tx_ring, cpu) = &adapter->tx_ring[i % adapter->num_tx_queues]; - /* This is incomplete because we'd like to assign separate - * physical cpus to these netdev polling structures and - * avoid saturating a subset of cpus. - */ - if (i < adapter->num_rx_queues) { - *per_cpu_ptr(adapter->cpu_netdev, cpu) = &adapter->polling_netdev[i]; - adapter->rx_ring[i].cpu = cpu; - cpu_set(cpu, adapter->cpumask); - } else - *per_cpu_ptr(adapter->cpu_netdev, cpu) = NULL; - - i++; - } - unlock_cpu_hotplug(); -} -#endif - /** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure @@ -1216,16 +1409,24 @@ e1000_open(struct net_device *netdev) struct e1000_adapter *adapter = netdev_priv(netdev); int err; - /* allocate transmit descriptors */ + /* disallow open during test */ + if (test_bit(__E1000_TESTING, &adapter->flags)) + return -EBUSY; + /* allocate transmit descriptors */ if ((err = e1000_setup_all_tx_resources(adapter))) goto err_setup_tx; /* allocate receive descriptors */ - if ((err = e1000_setup_all_rx_resources(adapter))) goto err_setup_rx; + err = e1000_request_irq(adapter); + if (err) + goto err_req_irq; + + e1000_power_up_phy(adapter); + if ((err = e1000_up(adapter))) goto err_up; adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; @@ -1243,6 +1444,9 @@ e1000_open(struct net_device *netdev) return E1000_SUCCESS; err_up: + e1000_power_down_phy(adapter); + e1000_free_irq(adapter); +err_req_irq: e1000_free_all_rx_resources(adapter); err_setup_rx: e1000_free_all_tx_resources(adapter); @@ -1269,13 +1473,20 @@ e1000_close(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); + WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); + e1000_power_down_phy(adapter); + e1000_free_irq(adapter); e1000_free_all_tx_resources(adapter); e1000_free_all_rx_resources(adapter); + /* kill manageability vlan ID if supported, but not if a vlan with + * the same ID is registered on the host OS (let 8021q kill it) */ if ((adapter->hw.mng_cookie.status & - E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) { + E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) && + !(adapter->vlgrp && + adapter->vlgrp->vlan_devices[adapter->mng_vlan_id])) { e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); } @@ -1294,7 +1505,7 @@ e1000_close(struct net_device *netdev) * @start: address of beginning of memory * @len: length of memory **/ -static inline boolean_t +static boolean_t e1000_check_64k_bound(struct e1000_adapter *adapter, void *start, unsigned long len) { @@ -1327,8 +1538,7 @@ e1000_setup_tx_resources(struct e1000_adapter *adapter, int size; size = sizeof(struct e1000_buffer) * txdr->count; - - txdr->buffer_info = vmalloc_node(size, pcibus_to_node(pdev->bus)); + txdr->buffer_info = vmalloc(size); if (!txdr->buffer_info) { DPRINTK(PROBE, ERR, "Unable to allocate memory for the transmit descriptor ring\n"); @@ -1393,10 +1603,6 @@ setup_tx_desc_die: * (Descriptors) for all queues * @adapter: board private structure * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * * Return 0 on success, negative on failure **/ @@ -1410,6 +1616,9 @@ e1000_setup_all_tx_resources(struct e1000_adapter *adapter) if (err) { DPRINTK(PROBE, ERR, "Allocation for Tx Queue %u failed\n", i); + for (i-- ; i >= 0; i--) + e1000_free_tx_resources(adapter, + &adapter->tx_ring[i]); break; } } @@ -1435,37 +1644,25 @@ e1000_configure_tx(struct e1000_adapter *adapter) /* Setup the HW Tx Head and Tail descriptor pointers */ switch (adapter->num_tx_queues) { - case 2: - tdba = adapter->tx_ring[1].dma; - tdlen = adapter->tx_ring[1].count * - sizeof(struct e1000_tx_desc); - E1000_WRITE_REG(hw, TDBAL1, (tdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, TDBAH1, (tdba >> 32)); - E1000_WRITE_REG(hw, TDLEN1, tdlen); - E1000_WRITE_REG(hw, TDH1, 0); - E1000_WRITE_REG(hw, TDT1, 0); - adapter->tx_ring[1].tdh = E1000_TDH1; - adapter->tx_ring[1].tdt = E1000_TDT1; - /* Fall Through */ case 1: default: tdba = adapter->tx_ring[0].dma; tdlen = adapter->tx_ring[0].count * sizeof(struct e1000_tx_desc); - E1000_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, TDBAH, (tdba >> 32)); E1000_WRITE_REG(hw, TDLEN, tdlen); - E1000_WRITE_REG(hw, TDH, 0); + E1000_WRITE_REG(hw, TDBAH, (tdba >> 32)); + E1000_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL)); E1000_WRITE_REG(hw, TDT, 0); - adapter->tx_ring[0].tdh = E1000_TDH; - adapter->tx_ring[0].tdt = E1000_TDT; + E1000_WRITE_REG(hw, TDH, 0); + adapter->tx_ring[0].tdh = ((hw->mac_type >= e1000_82543) ? E1000_TDH : E1000_82542_TDH); + adapter->tx_ring[0].tdt = ((hw->mac_type >= e1000_82543) ? E1000_TDT : E1000_82542_TDT); break; } /* Set the default values for the Tx Inter Packet Gap timer */ - - if (hw->media_type == e1000_media_type_fiber || - hw->media_type == e1000_media_type_internal_serdes) + if (adapter->hw.mac_type <= e1000_82547_rev_2 && + (hw->media_type == e1000_media_type_fiber || + hw->media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; @@ -1477,6 +1674,10 @@ e1000_configure_tx(struct e1000_adapter *adapter) ipgr1 = DEFAULT_82542_TIPG_IPGR1; ipgr2 = DEFAULT_82542_TIPG_IPGR2; break; + case e1000_80003es2lan: + ipgr1 = DEFAULT_82543_TIPG_IPGR1; + ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; + break; default: ipgr1 = DEFAULT_82543_TIPG_IPGR1; ipgr2 = DEFAULT_82543_TIPG_IPGR2; @@ -1495,31 +1696,33 @@ e1000_configure_tx(struct e1000_adapter *adapter) /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, TCTL); - tctl &= ~E1000_TCTL_CT; - tctl |= E1000_TCTL_EN | E1000_TCTL_PSP | E1000_TCTL_RTLC | + tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); - E1000_WRITE_REG(hw, TCTL, tctl); - if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) { tarc = E1000_READ_REG(hw, TARC0); - tarc |= ((1 << 25) | (1 << 21)); + /* set the speed mode bit, we'll clear it if we're not at + * gigabit link later */ + tarc |= (1 << 21); + E1000_WRITE_REG(hw, TARC0, tarc); + } else if (hw->mac_type == e1000_80003es2lan) { + tarc = E1000_READ_REG(hw, TARC0); + tarc |= 1; E1000_WRITE_REG(hw, TARC0, tarc); tarc = E1000_READ_REG(hw, TARC1); - tarc |= (1 << 25); - if (tctl & E1000_TCTL_MULR) - tarc &= ~(1 << 28); - else - tarc |= (1 << 28); + tarc |= 1; E1000_WRITE_REG(hw, TARC1, tarc); } e1000_config_collision_dist(hw); /* Setup Transmit Descriptor Settings for eop descriptor */ - adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP | - E1000_TXD_CMD_IFCS; + adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS; + + /* only set IDE if we are delaying interrupts using the timers */ + if (adapter->tx_int_delay) + adapter->txd_cmd |= E1000_TXD_CMD_IDE; if (hw->mac_type < e1000_82543) adapter->txd_cmd |= E1000_TXD_CMD_RPS; @@ -1531,6 +1734,9 @@ e1000_configure_tx(struct e1000_adapter *adapter) if (hw->mac_type == e1000_82544 && hw->bus_type == e1000_bus_type_pcix) adapter->pcix_82544 = 1; + + E1000_WRITE_REG(hw, TCTL, tctl); + } /** @@ -1549,7 +1755,7 @@ e1000_setup_rx_resources(struct e1000_adapter *adapter, int size, desc_len; size = sizeof(struct e1000_buffer) * rxdr->count; - rxdr->buffer_info = vmalloc_node(size, pcibus_to_node(pdev->bus)); + rxdr->buffer_info = vmalloc(size); if (!rxdr->buffer_info) { DPRINTK(PROBE, ERR, "Unable to allocate memory for the receive descriptor ring\n"); @@ -1644,10 +1850,6 @@ setup_rx_desc_die: * (Descriptors) for all queues * @adapter: board private structure * - * If this function returns with an error, then it's possible one or - * more of the rings is populated (while the rest are not). It is the - * callers duty to clean those orphaned rings. - * * Return 0 on success, negative on failure **/ @@ -1661,6 +1863,9 @@ e1000_setup_all_rx_resources(struct e1000_adapter *adapter) if (err) { DPRINTK(PROBE, ERR, "Allocation for Rx Queue %u failed\n", i); + for (i-- ; i >= 0; i--) + e1000_free_rx_resources(adapter, + &adapter->rx_ring[i]); break; } } @@ -1691,9 +1896,6 @@ e1000_setup_rctl(struct e1000_adapter *adapter) E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT); - if (adapter->hw.mac_type > e1000_82543) - rctl |= E1000_RCTL_SECRC; - if (adapter->hw.tbi_compatibility_on == 1) rctl |= E1000_RCTL_SBP; else @@ -1705,14 +1907,21 @@ e1000_setup_rctl(struct e1000_adapter *adapter) rctl |= E1000_RCTL_LPE; /* Setup buffer sizes */ - if (adapter->hw.mac_type >= e1000_82571) { - /* We can now specify buffers in 1K increments. - * BSIZE and BSEX are ignored in this case. */ - rctl |= adapter->rx_buffer_len << 0x11; - } else { - rctl &= ~E1000_RCTL_SZ_4096; - rctl |= E1000_RCTL_BSEX; - switch (adapter->rx_buffer_len) { + rctl &= ~E1000_RCTL_SZ_4096; + rctl |= E1000_RCTL_BSEX; + switch (adapter->rx_buffer_len) { + case E1000_RXBUFFER_256: + rctl |= E1000_RCTL_SZ_256; + rctl &= ~E1000_RCTL_BSEX; + break; + case E1000_RXBUFFER_512: + rctl |= E1000_RCTL_SZ_512; + rctl &= ~E1000_RCTL_BSEX; + break; + case E1000_RXBUFFER_1024: + rctl |= E1000_RCTL_SZ_1024; + rctl &= ~E1000_RCTL_BSEX; + break; case E1000_RXBUFFER_2048: default: rctl |= E1000_RCTL_SZ_2048; @@ -1727,7 +1936,6 @@ e1000_setup_rctl(struct e1000_adapter *adapter) case E1000_RXBUFFER_16384: rctl |= E1000_RCTL_SZ_16384; break; - } } #ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT @@ -1738,9 +1946,11 @@ e1000_setup_rctl(struct e1000_adapter *adapter) * followed by the page buffers. Therefore, skb->data is * sized to hold the largest protocol header. */ + /* allocations using alloc_page take too long for regular MTU + * so only enable packet split for jumbo frames */ pages = PAGE_USE_COUNT(adapter->netdev->mtu); - if ((adapter->hw.mac_type > e1000_82547_rev_2) && (pages <= 3) && - PAGE_SIZE <= 16384) + if ((adapter->hw.mac_type >= e1000_82571) && (pages <= 3) && + PAGE_SIZE <= 16384 && (rctl & E1000_RCTL_LPE)) adapter->rx_ps_pages = pages; else adapter->rx_ps_pages = 0; @@ -1749,11 +1959,14 @@ e1000_setup_rctl(struct e1000_adapter *adapter) /* Configure extra packet-split registers */ rfctl = E1000_READ_REG(&adapter->hw, RFCTL); rfctl |= E1000_RFCTL_EXTEN; - /* disable IPv6 packet split support */ - rfctl |= E1000_RFCTL_IPV6_DIS; + /* disable packet split support for IPv6 extension headers, + * because some malformed IPv6 headers can hang the RX */ + rfctl |= (E1000_RFCTL_IPV6_EX_DIS | + E1000_RFCTL_NEW_IPV6_EXT_DIS); + E1000_WRITE_REG(&adapter->hw, RFCTL, rfctl); - rctl |= E1000_RCTL_DTYP_PS | E1000_RCTL_SECRC; + rctl |= E1000_RCTL_DTYP_PS; psrctl |= adapter->rx_ps_bsize0 >> E1000_PSRCTL_BSIZE0_SHIFT; @@ -1790,12 +2003,9 @@ e1000_configure_rx(struct e1000_adapter *adapter) uint64_t rdba; struct e1000_hw *hw = &adapter->hw; uint32_t rdlen, rctl, rxcsum, ctrl_ext; -#ifdef CONFIG_E1000_MQ - uint32_t reta, mrqc; - int i; -#endif if (adapter->rx_ps_pages) { + /* this is a 32 byte descriptor */ rdlen = adapter->rx_ring[0].count * sizeof(union e1000_rx_desc_packet_split); adapter->clean_rx = e1000_clean_rx_irq_ps; @@ -1816,7 +2026,7 @@ e1000_configure_rx(struct e1000_adapter *adapter) if (hw->mac_type >= e1000_82540) { E1000_WRITE_REG(hw, RADV, adapter->rx_abs_int_delay); - if (adapter->itr > 1) + if (adapter->itr_setting != 0) E1000_WRITE_REG(hw, ITR, 1000000000 / (adapter->itr * 256)); } @@ -1824,84 +2034,32 @@ e1000_configure_rx(struct e1000_adapter *adapter) if (hw->mac_type >= e1000_82571) { ctrl_ext = E1000_READ_REG(hw, CTRL_EXT); /* Reset delay timers after every interrupt */ - ctrl_ext |= E1000_CTRL_EXT_CANC; + ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR; #ifdef CONFIG_E1000_NAPI - /* Auto-Mask interrupts upon ICR read. */ + /* Auto-Mask interrupts upon ICR access */ ctrl_ext |= E1000_CTRL_EXT_IAME; + E1000_WRITE_REG(hw, IAM, 0xffffffff); #endif E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext); - E1000_WRITE_REG(hw, IAM, ~0); E1000_WRITE_FLUSH(hw); } /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ switch (adapter->num_rx_queues) { -#ifdef CONFIG_E1000_MQ - case 2: - rdba = adapter->rx_ring[1].dma; - E1000_WRITE_REG(hw, RDBAL1, (rdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, RDBAH1, (rdba >> 32)); - E1000_WRITE_REG(hw, RDLEN1, rdlen); - E1000_WRITE_REG(hw, RDH1, 0); - E1000_WRITE_REG(hw, RDT1, 0); - adapter->rx_ring[1].rdh = E1000_RDH1; - adapter->rx_ring[1].rdt = E1000_RDT1; - /* Fall Through */ -#endif case 1: default: rdba = adapter->rx_ring[0].dma; - E1000_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL)); - E1000_WRITE_REG(hw, RDBAH, (rdba >> 32)); E1000_WRITE_REG(hw, RDLEN, rdlen); - E1000_WRITE_REG(hw, RDH, 0); + E1000_WRITE_REG(hw, RDBAH, (rdba >> 32)); + E1000_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL)); E1000_WRITE_REG(hw, RDT, 0); - adapter->rx_ring[0].rdh = E1000_RDH; - adapter->rx_ring[0].rdt = E1000_RDT; + E1000_WRITE_REG(hw, RDH, 0); + adapter->rx_ring[0].rdh = ((hw->mac_type >= e1000_82543) ? E1000_RDH : E1000_82542_RDH); + adapter->rx_ring[0].rdt = ((hw->mac_type >= e1000_82543) ? E1000_RDT : E1000_82542_RDT); break; } -#ifdef CONFIG_E1000_MQ - if (adapter->num_rx_queues > 1) { - uint32_t random[10]; - - get_random_bytes(&random[0], 40); - - if (hw->mac_type <= e1000_82572) { - E1000_WRITE_REG(hw, RSSIR, 0); - E1000_WRITE_REG(hw, RSSIM, 0); - } - - switch (adapter->num_rx_queues) { - case 2: - default: - reta = 0x00800080; - mrqc = E1000_MRQC_ENABLE_RSS_2Q; - break; - } - - /* Fill out redirection table */ - for (i = 0; i < 32; i++) - E1000_WRITE_REG_ARRAY(hw, RETA, i, reta); - /* Fill out hash function seeds */ - for (i = 0; i < 10; i++) - E1000_WRITE_REG_ARRAY(hw, RSSRK, i, random[i]); - - mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP); - E1000_WRITE_REG(hw, MRQC, mrqc); - } - - /* Multiqueue and packet checksumming are mutually exclusive. */ - if (hw->mac_type >= e1000_82571) { - rxcsum = E1000_READ_REG(hw, RXCSUM); - rxcsum |= E1000_RXCSUM_PCSD; - E1000_WRITE_REG(hw, RXCSUM, rxcsum); - } - -#else - /* Enable 82543 Receive Checksum Offload for TCP and UDP */ if (hw->mac_type >= e1000_82543) { rxcsum = E1000_READ_REG(hw, RXCSUM); @@ -1920,10 +2078,12 @@ e1000_configure_rx(struct e1000_adapter *adapter) } E1000_WRITE_REG(hw, RXCSUM, rxcsum); } -#endif /* CONFIG_E1000_MQ */ + /* enable early receives on 82573, only takes effect if using > 2048 + * byte total frame size. for example only for jumbo frames */ +#define E1000_ERT_2048 0x100 if (hw->mac_type == e1000_82573) - E1000_WRITE_REG(hw, ERT, 0x0100); + E1000_WRITE_REG(hw, ERT, E1000_ERT_2048); /* Enable Receives */ E1000_WRITE_REG(hw, RCTL, rctl); @@ -1969,7 +2129,7 @@ e1000_free_all_tx_resources(struct e1000_adapter *adapter) e1000_free_tx_resources(adapter, &adapter->tx_ring[i]); } -static inline void +static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info) { @@ -1978,10 +2138,13 @@ e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, buffer_info->dma, buffer_info->length, PCI_DMA_TODEVICE); + buffer_info->dma = 0; } - if (buffer_info->skb) + if (buffer_info->skb) { dev_kfree_skb_any(buffer_info->skb); - memset(buffer_info, 0, sizeof(struct e1000_buffer)); + buffer_info->skb = NULL; + } + /* buffer_info must be completely set up in the transmit path */ } /** @@ -2264,6 +2427,12 @@ e1000_set_multi(struct net_device *netdev) uint32_t rctl; uint32_t hash_value; int i, rar_entries = E1000_RAR_ENTRIES; + int mta_reg_count = (hw->mac_type == e1000_ich8lan) ? + E1000_NUM_MTA_REGISTERS_ICH8LAN : + E1000_NUM_MTA_REGISTERS; + + if (adapter->hw.mac_type == e1000_ich8lan) + rar_entries = E1000_RAR_ENTRIES_ICH8LAN; /* reserve RAR[14] for LAA over-write work-around */ if (adapter->hw.mac_type == e1000_82571) @@ -2302,14 +2471,18 @@ e1000_set_multi(struct net_device *netdev) mc_ptr = mc_ptr->next; } else { E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0); + E1000_WRITE_FLUSH(hw); E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0); + E1000_WRITE_FLUSH(hw); } } /* clear the old settings from the multicast hash table */ - for (i = 0; i < E1000_NUM_MTA_REGISTERS; i++) + for (i = 0; i < mta_reg_count; i++) { E1000_WRITE_REG_ARRAY(hw, MTA, i, 0); + E1000_WRITE_FLUSH(hw); + } /* load any remaining addresses into the hash table */ @@ -2382,19 +2555,20 @@ static void e1000_watchdog(unsigned long data) { struct e1000_adapter *adapter = (struct e1000_adapter *) data; - - /* Do the rest outside of interrupt context */ - schedule_work(&adapter->watchdog_task); -} - -static void -e1000_watchdog_task(struct e1000_adapter *adapter) -{ struct net_device *netdev = adapter->netdev; struct e1000_tx_ring *txdr = adapter->tx_ring; - uint32_t link; + uint32_t link, tctl; + int32_t ret_val; + + ret_val = e1000_check_for_link(&adapter->hw); + if ((ret_val == E1000_ERR_PHY) && + (adapter->hw.phy_type == e1000_phy_igp_3) && + (E1000_READ_REG(&adapter->hw, CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { + /* See e1000_kumeran_lock_loss_workaround() */ + DPRINTK(LINK, INFO, + "Gigabit has been disabled, downgrading speed\n"); + } - e1000_check_for_link(&adapter->hw); if (adapter->hw.mac_type == e1000_82573) { e1000_enable_tx_pkt_filtering(&adapter->hw); if (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id) @@ -2409,6 +2583,7 @@ e1000_watchdog_task(struct e1000_adapter *adapter) if (link) { if (!netif_carrier_ok(netdev)) { + boolean_t txb2b = 1; e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); @@ -2418,25 +2593,77 @@ e1000_watchdog_task(struct e1000_adapter *adapter) adapter->link_duplex == FULL_DUPLEX ? "Full Duplex" : "Half Duplex"); - /* tweak tx_queue_len according to speed/duplex */ + /* tweak tx_queue_len according to speed/duplex + * and adjust the timeout factor */ netdev->tx_queue_len = adapter->tx_queue_len; adapter->tx_timeout_factor = 1; - if (adapter->link_duplex == HALF_DUPLEX) { + switch (adapter->link_speed) { + case SPEED_10: + txb2b = 0; + netdev->tx_queue_len = 10; + adapter->tx_timeout_factor = 8; + break; + case SPEED_100: + txb2b = 0; + netdev->tx_queue_len = 100; + /* maybe add some timeout factor ? */ + break; + } + + if ((adapter->hw.mac_type == e1000_82571 || + adapter->hw.mac_type == e1000_82572) && + txb2b == 0) { + uint32_t tarc0; + tarc0 = E1000_READ_REG(&adapter->hw, TARC0); + tarc0 &= ~(1 << 21); + E1000_WRITE_REG(&adapter->hw, TARC0, tarc0); + } + +#ifdef NETIF_F_TSO + /* disable TSO for pcie and 10/100 speeds, to avoid + * some hardware issues */ + if (!adapter->tso_force && + adapter->hw.bus_type == e1000_bus_type_pci_express){ switch (adapter->link_speed) { case SPEED_10: - netdev->tx_queue_len = 10; - adapter->tx_timeout_factor = 8; - break; case SPEED_100: - netdev->tx_queue_len = 100; + DPRINTK(PROBE,INFO, + "10/100 speed: disabling TSO\n"); + netdev->features &= ~NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features &= ~NETIF_F_TSO6; +#endif + break; + case SPEED_1000: + netdev->features |= NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; +#endif + break; + default: + /* oops */ break; } } +#endif + + /* enable transmits in the hardware, need to do this + * after setting TARC0 */ + tctl = E1000_READ_REG(&adapter->hw, TCTL); + tctl |= E1000_TCTL_EN; + E1000_WRITE_REG(&adapter->hw, TCTL, tctl); netif_carrier_on(netdev); netif_wake_queue(netdev); mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); adapter->smartspeed = 0; + } else { + /* make sure the receive unit is started */ + if (adapter->hw.rx_needs_kicking) { + struct e1000_hw *hw = &adapter->hw; + uint32_t rctl = E1000_READ_REG(hw, RCTL); + E1000_WRITE_REG(hw, RCTL, rctl | E1000_RCTL_EN); + } } } else { if (netif_carrier_ok(netdev)) { @@ -2446,6 +2673,15 @@ e1000_watchdog_task(struct e1000_adapter *adapter) netif_carrier_off(netdev); netif_stop_queue(netdev); mod_timer(&adapter->phy_info_timer, jiffies + 2 * HZ); + + /* 80003ES2LAN workaround-- + * For packet buffer work-around on link down event; + * disable receives in the ISR and + * reset device here in the watchdog + */ + if (adapter->hw.mac_type == e1000_80003es2lan) + /* reset device */ + schedule_work(&adapter->reset_task); } e1000_smartspeed(adapter); @@ -2465,32 +2701,17 @@ e1000_watchdog_task(struct e1000_adapter *adapter) e1000_update_adaptive(&adapter->hw); -#ifdef CONFIG_E1000_MQ - txdr = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id()); -#endif if (!netif_carrier_ok(netdev)) { if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ - schedule_work(&adapter->tx_timeout_task); + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); } } - /* Dynamic mode for Interrupt Throttle Rate (ITR) */ - if (adapter->hw.mac_type >= e1000_82540 && adapter->itr == 1) { - /* Symmetric Tx/Rx gets a reduced ITR=2000; Total - * asymmetrical Tx or Rx gets ITR=8000; everyone - * else is between 2000-8000. */ - uint32_t goc = (adapter->gotcl + adapter->gorcl) / 10000; - uint32_t dif = (adapter->gotcl > adapter->gorcl ? - adapter->gotcl - adapter->gorcl : - adapter->gorcl - adapter->gotcl) / 10000; - uint32_t itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; - E1000_WRITE_REG(&adapter->hw, ITR, 1000000000 / (itr * 256)); - } - /* Cause software interrupt to ensure rx ring is cleaned */ E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0); @@ -2506,6 +2727,143 @@ e1000_watchdog_task(struct e1000_adapter *adapter) mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ); } +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +/** + * e1000_update_itr - update the dynamic ITR value based on statistics + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see e1000_param.c) + * @adapter: pointer to adapter + * @itr_setting: current adapter->itr + * @packets: the number of packets during this measurement interval + * @bytes: the number of bytes during this measurement interval + **/ +static unsigned int e1000_update_itr(struct e1000_adapter *adapter, + uint16_t itr_setting, + int packets, + int bytes) +{ + unsigned int retval = itr_setting; + struct e1000_hw *hw = &adapter->hw; + + if (unlikely(hw->mac_type < e1000_82540)) + goto update_itr_done; + + if (packets == 0) + goto update_itr_done; + + switch (itr_setting) { + case lowest_latency: + /* jumbo frames get bulk treatment*/ + if (bytes/packets > 8000) + retval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + retval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* jumbo frames need bulk latency setting */ + if (bytes/packets > 8000) + retval = bulk_latency; + else if ((packets < 10) || ((bytes/packets) > 1200)) + retval = bulk_latency; + else if ((packets > 35)) + retval = lowest_latency; + } else if (bytes/packets > 2000) + retval = bulk_latency; + else if (packets <= 2 && bytes < 512) + retval = lowest_latency; + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + retval = low_latency; + } else if (bytes < 6000) { + retval = low_latency; + } + break; + } + +update_itr_done: + return retval; +} + +static void e1000_set_itr(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + uint16_t current_itr; + uint32_t new_itr = adapter->itr; + + if (unlikely(hw->mac_type < e1000_82540)) + return; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + if (unlikely(adapter->link_speed != SPEED_1000)) { + current_itr = 0; + new_itr = 4000; + goto set_itr_now; + } + + adapter->tx_itr = e1000_update_itr(adapter, + adapter->tx_itr, + adapter->total_tx_packets, + adapter->total_tx_bytes); + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency) + adapter->tx_itr = low_latency; + + adapter->rx_itr = e1000_update_itr(adapter, + adapter->rx_itr, + adapter->total_rx_packets, + adapter->total_rx_bytes); + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency) + adapter->rx_itr = low_latency; + + current_itr = max(adapter->rx_itr, adapter->tx_itr); + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = 70000; + break; + case low_latency: + new_itr = 20000; /* aka hwitr = ~200 */ + break; + case bulk_latency: + new_itr = 4000; + break; + default: + break; + } + +set_itr_now: + if (new_itr != adapter->itr) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing */ + new_itr = new_itr > adapter->itr ? + min(adapter->itr + (new_itr >> 2), new_itr) : + new_itr; + adapter->itr = new_itr; + E1000_WRITE_REG(hw, ITR, 1000000000 / (new_itr * 256)); + } + + return; +} + #define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 @@ -2513,7 +2871,7 @@ e1000_watchdog_task(struct e1000_adapter *adapter) #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 -static inline int +static int e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, struct sk_buff *skb) { @@ -2526,7 +2884,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, uint8_t ipcss, ipcso, tucss, tucso, hdr_len; int err; - if (skb_shinfo(skb)->tso_size) { + if (skb_is_gso(skb)) { if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (err) @@ -2534,8 +2892,8 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, } hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - mss = skb_shinfo(skb)->tso_size; - if (skb->protocol == ntohs(ETH_P_IP)) { + mss = skb_shinfo(skb)->gso_size; + if (skb->protocol == htons(ETH_P_IP)) { skb->nh.iph->tot_len = 0; skb->nh.iph->check = 0; skb->h.th->check = @@ -2546,8 +2904,8 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, 0); cmd_length = E1000_TXD_CMD_IP; ipcse = skb->h.raw - skb->data - 1; -#ifdef NETIF_F_TSO_IPV6 - } else if (skb->protocol == ntohs(ETH_P_IPV6)) { +#ifdef NETIF_F_TSO6 + } else if (skb->protocol == htons(ETH_P_IPV6)) { skb->nh.ipv6h->payload_len = 0; skb->h.th->check = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, @@ -2582,6 +2940,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, context_desc->cmd_and_length = cpu_to_le32(cmd_length); buffer_info->time_stamp = jiffies; + buffer_info->next_to_watch = i; if (++i == tx_ring->count) i = 0; tx_ring->next_to_use = i; @@ -2593,7 +2952,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, return FALSE; } -static inline boolean_t +static boolean_t e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, struct sk_buff *skb) { @@ -2602,7 +2961,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, unsigned int i; uint8_t css; - if (likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { css = skb->h.raw - skb->data; i = tx_ring->next_to_use; @@ -2610,12 +2969,13 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, context_desc = E1000_CONTEXT_DESC(*tx_ring, i); context_desc->upper_setup.tcp_fields.tucss = css; - context_desc->upper_setup.tcp_fields.tucso = css + skb->csum; + context_desc->upper_setup.tcp_fields.tucso = css + skb->csum_offset; context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.data = 0; context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); buffer_info->time_stamp = jiffies; + buffer_info->next_to_watch = i; if (unlikely(++i == tx_ring->count)) i = 0; tx_ring->next_to_use = i; @@ -2629,7 +2989,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, #define E1000_MAX_TXD_PWR 12 #define E1000_MAX_DATA_PER_TXD (1<data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) { + !skb_is_gso(skb)) { tx_ring->last_tx_tso = 0; size -= 4; } @@ -2684,6 +3044,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, size, PCI_DMA_TODEVICE); buffer_info->time_stamp = jiffies; + buffer_info->next_to_watch = i; len -= size; offset += size; @@ -2723,6 +3084,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, size, PCI_DMA_TODEVICE); buffer_info->time_stamp = jiffies; + buffer_info->next_to_watch = i; len -= size; offset += size; @@ -2738,7 +3100,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, return count; } -static inline void +static void e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, int tx_flags, int count) { @@ -2788,6 +3150,9 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, tx_ring->next_to_use = i; writel(i, adapter->hw.hw_addr + tx_ring->tdt); + /* we need this if more than one processor can write to our tail + * at a time, it syncronizes IO on IA64/Altix systems */ + mmiowb(); } /** @@ -2802,7 +3167,7 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, #define E1000_FIFO_HDR 0x10 #define E1000_82547_PAD_LEN 0x3E0 -static inline int +static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, struct sk_buff *skb) { uint32_t fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; @@ -2829,7 +3194,7 @@ no_fifo_stall_required: } #define MINIMUM_DHCP_PACKET_SIZE 282 -static inline int +static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) { struct e1000_hw *hw = &adapter->hw; @@ -2840,7 +3205,7 @@ e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) ) return 0; } - if ((skb->len > MINIMUM_DHCP_PACKET_SIZE) && (!skb->protocol)) { + if (skb->len > MINIMUM_DHCP_PACKET_SIZE) { struct ethhdr *eth = (struct ethhdr *) skb->data; if ((htons(ETH_P_IP) == eth->h_proto)) { const struct iphdr *ip = @@ -2863,6 +3228,36 @@ e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct sk_buff *skb) return 0; } +static int __e1000_maybe_stop_tx(struct net_device *netdev, int size) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_tx_ring *tx_ring = adapter->tx_ring; + + netif_stop_queue(netdev); + /* Herbert's original patch had: + * smp_mb__after_netif_stop_queue(); + * but since that doesn't exist yet, just open code it. */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. */ + if (likely(E1000_DESC_UNUSED(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! */ + netif_start_queue(netdev); + ++adapter->restart_queue; + return 0; +} + +static int e1000_maybe_stop_tx(struct net_device *netdev, + struct e1000_tx_ring *tx_ring, int size) +{ + if (likely(E1000_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __e1000_maybe_stop_tx(netdev, size); +} + #define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) @@ -2877,24 +3272,29 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) unsigned int nr_frags = 0; unsigned int mss = 0; int count = 0; - int tso; + int tso; unsigned int f; len -= skb->data_len; -#ifdef CONFIG_E1000_MQ - tx_ring = *per_cpu_ptr(adapter->cpu_tx_ring, smp_processor_id()); -#else + /* This goes back to the question of how to logically map a tx queue + * to a flow. Right now, performance is impacted slightly negatively + * if using multiple tx queues. If the stack breaks away from a + * single qdisc implementation, we can look at this again. */ tx_ring = adapter->tx_ring; -#endif if (unlikely(skb->len <= 0)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* 82571 and newer doesn't need the workaround that limited descriptor + * length to 4kB */ + if (adapter->hw.mac_type >= e1000_82571) + max_per_txd = 8192; + #ifdef NETIF_F_TSO - mss = skb_shinfo(skb)->tso_size; - /* The controller does a simple calculation to + mss = skb_shinfo(skb)->gso_size; + /* The controller does a simple calculation to * make sure there is enough room in the FIFO before * initiating the DMA for each buffer. The calc is: * 4 = ceil(buffer len/mss). To make sure we don't @@ -2905,37 +3305,55 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) max_per_txd = min(mss << 2, max_per_txd); max_txd_pwr = fls(max_per_txd) - 1; - /* TSO Workaround for 82571/2 Controllers -- if skb->data - * points to just header, pull a few bytes of payload from - * frags into skb->data */ + /* TSO Workaround for 82571/2/3 Controllers -- if skb->data + * points to just header, pull a few bytes of payload from + * frags into skb->data */ hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - if (skb->data_len && (hdr_len == (skb->len - skb->data_len)) && - (adapter->hw.mac_type == e1000_82571 || - adapter->hw.mac_type == e1000_82572)) { - unsigned int pull_size; - pull_size = min((unsigned int)4, skb->data_len); - if (!__pskb_pull_tail(skb, pull_size)) { - printk(KERN_ERR "__pskb_pull_tail failed.\n"); - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) { + switch (adapter->hw.mac_type) { + unsigned int pull_size; + case e1000_82544: + /* Make sure we have room to chop off 4 bytes, + * and that the end alignment will work out to + * this hardware's requirements + * NOTE: this is a TSO only workaround + * if end byte alignment not correct move us + * into the next dword */ + if ((unsigned long)(skb->tail - 1) & 4) + break; + /* fall through */ + case e1000_82571: + case e1000_82572: + case e1000_82573: + case e1000_ich8lan: + pull_size = min((unsigned int)4, skb->data_len); + if (!__pskb_pull_tail(skb, pull_size)) { + DPRINTK(DRV, ERR, + "__pskb_pull_tail failed.\n"); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + len = skb->len - skb->data_len; + break; + default: + /* do nothing */ + break; } - len = skb->len - skb->data_len; } } /* reserve a descriptor for the offload context */ - if ((mss) || (skb->ip_summed == CHECKSUM_HW)) + if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; #else - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) count++; #endif #ifdef NETIF_F_TSO /* Controller Erratum workaround */ - if (!skb->data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) + if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb)) count++; #endif @@ -2958,7 +3376,9 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (adapter->pcix_82544) count += nr_frags; - if (adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) + + if (adapter->hw.tx_pkt_filtering && + (adapter->hw.mac_type == e1000_82573)) e1000_transfer_dhcp_info(adapter, skb); local_irq_save(flags); @@ -2970,8 +3390,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* need: count + 2 desc gap to keep tail from touching * head, otherwise try next time */ - if (unlikely(E1000_DESC_UNUSED(tx_ring) < count + 2)) { - netif_stop_queue(netdev); + if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) { spin_unlock_irqrestore(&tx_ring->tx_lock, flags); return NETDEV_TX_BUSY; } @@ -2979,7 +3398,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (unlikely(adapter->hw.mac_type == e1000_82547)) { if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { netif_stop_queue(netdev); - mod_timer(&adapter->tx_fifo_stall_timer, jiffies); + mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1); spin_unlock_irqrestore(&tx_ring->tx_lock, flags); return NETDEV_TX_BUSY; } @@ -3008,7 +3427,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* Old method was to assume IPv4 packet by default if TSO was enabled. * 82571 hardware supports TSO capabilities for IPv6 as well... * no longer assume, we must. */ - if (likely(skb->protocol == ntohs(ETH_P_IP))) + if (likely(skb->protocol == htons(ETH_P_IP))) tx_flags |= E1000_TX_FLAGS_IPV4; e1000_tx_queue(adapter, tx_ring, tx_flags, @@ -3018,8 +3437,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) netdev->trans_start = jiffies; /* Make sure there is space in the ring for the next send. */ - if (unlikely(E1000_DESC_UNUSED(tx_ring) < MAX_SKB_FRAGS + 2)) - netif_stop_queue(netdev); + e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); spin_unlock_irqrestore(&tx_ring->tx_lock, flags); return NETDEV_TX_OK; @@ -3036,17 +3454,17 @@ e1000_tx_timeout(struct net_device *netdev) struct e1000_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ - schedule_work(&adapter->tx_timeout_task); + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); } static void -e1000_tx_timeout_task(struct net_device *netdev) +e1000_reset_task(struct work_struct *work) { - struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_adapter *adapter = + container_of(work, struct e1000_adapter, reset_task); - adapter->tx_timeout_count++; - e1000_down(adapter); - e1000_up(adapter); + e1000_reinit_locked(adapter); } /** @@ -3079,6 +3497,7 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE; + uint16_t eeprom_data = 0; if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { @@ -3088,16 +3507,34 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) /* Adapter-specific max frame size limits. */ switch (adapter->hw.mac_type) { - case e1000_82542_rev2_0: - case e1000_82542_rev2_1: - case e1000_82573: + case e1000_undefined ... e1000_82542_rev2_1: + case e1000_ich8lan: if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n"); return -EINVAL; } break; + case e1000_82573: + /* Jumbo Frames not supported if: + * - this is not an 82573L device + * - ASPM is enabled in any way (0x1A bits 3:2) */ + e1000_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1, + &eeprom_data); + if ((adapter->hw.device_id != E1000_DEV_ID_82573L) || + (eeprom_data & EEPROM_WORD1A_ASPM_MASK)) { + if (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE) { + DPRINTK(PROBE, ERR, + "Jumbo Frames not supported.\n"); + return -EINVAL; + } + break; + } + /* ERT will be enabled later to enable wire speed receives */ + + /* fall through to get support */ case e1000_82571: case e1000_82572: + case e1000_80003es2lan: #define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { DPRINTK(PROBE, ERR, "MTU > 9216 not supported.\n"); @@ -3109,37 +3546,38 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) break; } - - if (adapter->hw.mac_type > e1000_82547_rev_2) { - adapter->rx_buffer_len = max_frame; - E1000_ROUNDUP(adapter->rx_buffer_len, 1024); - } else { - if(unlikely((adapter->hw.mac_type < e1000_82543) && - (max_frame > MAXIMUM_ETHERNET_FRAME_SIZE))) { - DPRINTK(PROBE, ERR, "Jumbo Frames not supported " - "on 82542\n"); - return -EINVAL; - } else { - if(max_frame <= E1000_RXBUFFER_2048) - adapter->rx_buffer_len = E1000_RXBUFFER_2048; - else if(max_frame <= E1000_RXBUFFER_4096) - adapter->rx_buffer_len = E1000_RXBUFFER_4096; - else if(max_frame <= E1000_RXBUFFER_8192) - adapter->rx_buffer_len = E1000_RXBUFFER_8192; - else if(max_frame <= E1000_RXBUFFER_16384) - adapter->rx_buffer_len = E1000_RXBUFFER_16384; - } - } + /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN + * means we reserve 2 more, this pushes us to allocate from the next + * larger slab size + * i.e. RXBUFFER_2048 --> size-4096 slab */ + + if (max_frame <= E1000_RXBUFFER_256) + adapter->rx_buffer_len = E1000_RXBUFFER_256; + else if (max_frame <= E1000_RXBUFFER_512) + adapter->rx_buffer_len = E1000_RXBUFFER_512; + else if (max_frame <= E1000_RXBUFFER_1024) + adapter->rx_buffer_len = E1000_RXBUFFER_1024; + else if (max_frame <= E1000_RXBUFFER_2048) + adapter->rx_buffer_len = E1000_RXBUFFER_2048; + else if (max_frame <= E1000_RXBUFFER_4096) + adapter->rx_buffer_len = E1000_RXBUFFER_4096; + else if (max_frame <= E1000_RXBUFFER_8192) + adapter->rx_buffer_len = E1000_RXBUFFER_8192; + else if (max_frame <= E1000_RXBUFFER_16384) + adapter->rx_buffer_len = E1000_RXBUFFER_16384; + + /* adjust allocation if LPE protects us, and we aren't using SBP */ + if (!adapter->hw.tbi_compatibility_on && + ((max_frame == MAXIMUM_ETHERNET_FRAME_SIZE) || + (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE))) + adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE; netdev->mtu = new_mtu; - - if (netif_running(netdev)) { - e1000_down(adapter); - e1000_up(adapter); - } - adapter->hw.max_frame_size = max_frame; + if (netif_running(netdev)) + e1000_reinit_locked(adapter); + return 0; } @@ -3152,11 +3590,21 @@ void e1000_update_stats(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; unsigned long flags; uint16_t phy_tmp; #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF + /* + * Prevent stats update while adapter is being reset, or if the pci + * connection is down. + */ + if (adapter->link_speed == 0) + return; + if (pdev->error_state && pdev->error_state != pci_channel_io_normal) + return; + spin_lock_irqsave(&adapter->stats_lock, flags); /* these counters are modified from e1000_adjust_tbi_stats, @@ -3171,12 +3619,15 @@ e1000_update_stats(struct e1000_adapter *adapter) adapter->stats.bprc += E1000_READ_REG(hw, BPRC); adapter->stats.mprc += E1000_READ_REG(hw, MPRC); adapter->stats.roc += E1000_READ_REG(hw, ROC); - adapter->stats.prc64 += E1000_READ_REG(hw, PRC64); - adapter->stats.prc127 += E1000_READ_REG(hw, PRC127); - adapter->stats.prc255 += E1000_READ_REG(hw, PRC255); - adapter->stats.prc511 += E1000_READ_REG(hw, PRC511); - adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023); - adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522); + + if (adapter->hw.mac_type != e1000_ich8lan) { + adapter->stats.prc64 += E1000_READ_REG(hw, PRC64); + adapter->stats.prc127 += E1000_READ_REG(hw, PRC127); + adapter->stats.prc255 += E1000_READ_REG(hw, PRC255); + adapter->stats.prc511 += E1000_READ_REG(hw, PRC511); + adapter->stats.prc1023 += E1000_READ_REG(hw, PRC1023); + adapter->stats.prc1522 += E1000_READ_REG(hw, PRC1522); + } adapter->stats.symerrs += E1000_READ_REG(hw, SYMERRS); adapter->stats.mpc += E1000_READ_REG(hw, MPC); @@ -3204,12 +3655,16 @@ e1000_update_stats(struct e1000_adapter *adapter) adapter->stats.totl += E1000_READ_REG(hw, TOTL); adapter->stats.toth += E1000_READ_REG(hw, TOTH); adapter->stats.tpr += E1000_READ_REG(hw, TPR); - adapter->stats.ptc64 += E1000_READ_REG(hw, PTC64); - adapter->stats.ptc127 += E1000_READ_REG(hw, PTC127); - adapter->stats.ptc255 += E1000_READ_REG(hw, PTC255); - adapter->stats.ptc511 += E1000_READ_REG(hw, PTC511); - adapter->stats.ptc1023 += E1000_READ_REG(hw, PTC1023); - adapter->stats.ptc1522 += E1000_READ_REG(hw, PTC1522); + + if (adapter->hw.mac_type != e1000_ich8lan) { + adapter->stats.ptc64 += E1000_READ_REG(hw, PTC64); + adapter->stats.ptc127 += E1000_READ_REG(hw, PTC127); + adapter->stats.ptc255 += E1000_READ_REG(hw, PTC255); + adapter->stats.ptc511 += E1000_READ_REG(hw, PTC511); + adapter->stats.ptc1023 += E1000_READ_REG(hw, PTC1023); + adapter->stats.ptc1522 += E1000_READ_REG(hw, PTC1522); + } + adapter->stats.mptc += E1000_READ_REG(hw, MPTC); adapter->stats.bptc += E1000_READ_REG(hw, BPTC); @@ -3231,17 +3686,19 @@ e1000_update_stats(struct e1000_adapter *adapter) if (hw->mac_type > e1000_82547_rev_2) { adapter->stats.iac += E1000_READ_REG(hw, IAC); adapter->stats.icrxoc += E1000_READ_REG(hw, ICRXOC); - adapter->stats.icrxptc += E1000_READ_REG(hw, ICRXPTC); - adapter->stats.icrxatc += E1000_READ_REG(hw, ICRXATC); - adapter->stats.ictxptc += E1000_READ_REG(hw, ICTXPTC); - adapter->stats.ictxatc += E1000_READ_REG(hw, ICTXATC); - adapter->stats.ictxqec += E1000_READ_REG(hw, ICTXQEC); - adapter->stats.ictxqmtc += E1000_READ_REG(hw, ICTXQMTC); - adapter->stats.icrxdmtc += E1000_READ_REG(hw, ICRXDMTC); + + if (adapter->hw.mac_type != e1000_ich8lan) { + adapter->stats.icrxptc += E1000_READ_REG(hw, ICRXPTC); + adapter->stats.icrxatc += E1000_READ_REG(hw, ICRXATC); + adapter->stats.ictxptc += E1000_READ_REG(hw, ICTXPTC); + adapter->stats.ictxatc += E1000_READ_REG(hw, ICTXATC); + adapter->stats.ictxqec += E1000_READ_REG(hw, ICTXQEC); + adapter->stats.ictxqmtc += E1000_READ_REG(hw, ICTXQMTC); + adapter->stats.icrxdmtc += E1000_READ_REG(hw, ICRXDMTC); + } } /* Fill out the OS statistics structure */ - adapter->net_stats.rx_packets = adapter->stats.gprc; adapter->net_stats.tx_packets = adapter->stats.gptc; adapter->net_stats.rx_bytes = adapter->stats.gorcl; @@ -3251,27 +3708,33 @@ e1000_update_stats(struct e1000_adapter *adapter) /* Rx Errors */ + /* RLEC on some newer hardware can be incorrect so build + * our own version based on RUC and ROC */ adapter->net_stats.rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + - adapter->stats.rlec + adapter->stats.cexterr; - adapter->net_stats.rx_dropped = 0; - adapter->net_stats.rx_length_errors = adapter->stats.rlec; + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.cexterr; + adapter->stats.rlerrc = adapter->stats.ruc + adapter->stats.roc; + adapter->net_stats.rx_length_errors = adapter->stats.rlerrc; adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs; adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc; adapter->net_stats.rx_missed_errors = adapter->stats.mpc; /* Tx Errors */ - - adapter->net_stats.tx_errors = adapter->stats.ecol + - adapter->stats.latecol; + adapter->stats.txerrc = adapter->stats.ecol + adapter->stats.latecol; + adapter->net_stats.tx_errors = adapter->stats.txerrc; adapter->net_stats.tx_aborted_errors = adapter->stats.ecol; adapter->net_stats.tx_window_errors = adapter->stats.latecol; adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs; + if (adapter->hw.bad_tx_carr_stats_fd && + adapter->link_duplex == FULL_DUPLEX) { + adapter->net_stats.tx_carrier_errors = 0; + adapter->stats.tncrs = 0; + } /* Tx Dropped needs to be maintained elsewhere */ /* Phy Stats */ - if (hw->media_type == e1000_media_type_copper) { if ((adapter->link_speed == SPEED_1000) && (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { @@ -3285,29 +3748,102 @@ e1000_update_stats(struct e1000_adapter *adapter) adapter->phy_stats.receive_errors += phy_tmp; } + /* Management Stats */ + if (adapter->hw.has_smbus) { + adapter->stats.mgptc += E1000_READ_REG(hw, MGTPTC); + adapter->stats.mgprc += E1000_READ_REG(hw, MGTPRC); + adapter->stats.mgpdc += E1000_READ_REG(hw, MGTPDC); + } + spin_unlock_irqrestore(&adapter->stats_lock, flags); } +#ifdef CONFIG_PCI_MSI -#ifdef CONFIG_E1000_MQ -void -e1000_rx_schedule(void *data) +/** + * e1000_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ + +static +irqreturn_t e1000_intr_msi(int irq, void *data) { - struct net_device *poll_dev, *netdev = data; - struct e1000_adapter *adapter = netdev->priv; - int this_cpu = get_cpu(); + struct net_device *netdev = data; + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +#ifndef CONFIG_E1000_NAPI + int i; +#endif - poll_dev = *per_cpu_ptr(adapter->cpu_netdev, this_cpu); - if (poll_dev == NULL) { - put_cpu(); - return; + /* this code avoids the read of ICR but has to get 1000 interrupts + * at every link change event before it will notice the change */ + if (++adapter->detect_link >= 1000) { + uint32_t icr = E1000_READ_REG(hw, ICR); +#ifdef CONFIG_E1000_NAPI + /* read ICR disables interrupts using IAM, so keep up with our + * enable/disable accounting */ + atomic_inc(&adapter->irq_sem); +#endif + adapter->detect_link = 0; + if ((icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) && + (icr & E1000_ICR_INT_ASSERTED)) { + hw->get_link_status = 1; + /* 80003ES2LAN workaround-- + * For packet buffer work-around on link down event; + * disable receives here in the ISR and + * reset adapter in watchdog + */ + if (netif_carrier_ok(netdev) && + (adapter->hw.mac_type == e1000_80003es2lan)) { + /* disable receives */ + uint32_t rctl = E1000_READ_REG(hw, RCTL); + E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN); + } + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, &adapter->flags)) + mod_timer(&adapter->watchdog_timer, + jiffies + 1); + } + } else { + E1000_WRITE_REG(hw, ICR, (0xffffffff & ~(E1000_ICR_RXSEQ | + E1000_ICR_LSC))); + /* bummer we have to flush here, but things break otherwise as + * some event appears to be lost or delayed and throughput + * drops. In almost all tests this flush is un-necessary */ + E1000_WRITE_FLUSH(hw); +#ifdef CONFIG_E1000_NAPI + /* Interrupt Auto-Mask (IAM)...upon writing ICR, interrupts are + * masked. No need for the IMC write, but it does mean we + * should account for it ASAP. */ + atomic_inc(&adapter->irq_sem); +#endif } - if (likely(netif_rx_schedule_prep(poll_dev))) - __netif_rx_schedule(poll_dev); - else +#ifdef CONFIG_E1000_NAPI + if (likely(netif_rx_schedule_prep(netdev))) { + adapter->total_tx_bytes = 0; + adapter->total_tx_packets = 0; + adapter->total_rx_bytes = 0; + adapter->total_rx_packets = 0; + __netif_rx_schedule(netdev); + } else e1000_irq_enable(adapter); +#else + adapter->total_tx_bytes = 0; + adapter->total_rx_bytes = 0; + adapter->total_tx_packets = 0; + adapter->total_rx_packets = 0; + + for (i = 0; i < E1000_MAX_INTR; i++) + if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & + !e1000_clean_tx_irq(adapter, adapter->tx_ring))) + break; - put_cpu(); + if (likely(adapter->itr_setting & 3)) + e1000_set_itr(adapter); +#endif + + return IRQ_HANDLED; } #endif @@ -3315,19 +3851,28 @@ e1000_rx_schedule(void *data) * e1000_intr - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure - * @pt_regs: CPU registers structure **/ static irqreturn_t -e1000_intr(int irq, void *data, struct pt_regs *regs) +e1000_intr(int irq, void *data) { struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - uint32_t icr = E1000_READ_REG(hw, ICR); + uint32_t rctl, icr = E1000_READ_REG(hw, ICR); #ifndef CONFIG_E1000_NAPI int i; -#else +#endif + if (unlikely(!icr)) + return IRQ_NONE; /* Not our interrupt */ + +#ifdef CONFIG_E1000_NAPI + /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is + * not set, then the adapter didn't send an interrupt */ + if (unlikely(hw->mac_type >= e1000_82571 && + !(icr & E1000_ICR_INT_ASSERTED))) + return IRQ_NONE; + /* Interrupt Auto-Mask...upon reading ICR, * interrupts are masked. No need for the * IMC write, but it does mean we should @@ -3336,45 +3881,42 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) atomic_inc(&adapter->irq_sem); #endif - if (unlikely(!icr)) { -#ifdef CONFIG_E1000_NAPI - if (hw->mac_type >= e1000_82571) - e1000_irq_enable(adapter); -#endif - return IRQ_NONE; /* Not our interrupt */ - } - if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { hw->get_link_status = 1; - mod_timer(&adapter->watchdog_timer, jiffies); + /* 80003ES2LAN workaround-- + * For packet buffer work-around on link down event; + * disable receives here in the ISR and + * reset adapter in watchdog + */ + if (netif_carrier_ok(netdev) && + (adapter->hw.mac_type == e1000_80003es2lan)) { + /* disable receives */ + rctl = E1000_READ_REG(hw, RCTL); + E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN); + } + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, &adapter->flags)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); } #ifdef CONFIG_E1000_NAPI if (unlikely(hw->mac_type < e1000_82571)) { + /* disable interrupts, without the synchronize_irq bit */ atomic_inc(&adapter->irq_sem); E1000_WRITE_REG(hw, IMC, ~0); E1000_WRITE_FLUSH(hw); } -#ifdef CONFIG_E1000_MQ - if (atomic_read(&adapter->rx_sched_call_data.count) == 0) { - /* We must setup the cpumask once count == 0 since - * each cpu bit is cleared when the work is done. */ - adapter->rx_sched_call_data.cpumask = adapter->cpumask; - atomic_add(adapter->num_rx_queues - 1, &adapter->irq_sem); - atomic_set(&adapter->rx_sched_call_data.count, - adapter->num_rx_queues); - smp_call_async_mask(&adapter->rx_sched_call_data); - } else { - printk("call_data.count == %u\n", atomic_read(&adapter->rx_sched_call_data.count)); - } -#else /* if !CONFIG_E1000_MQ */ - if (likely(netif_rx_schedule_prep(&adapter->polling_netdev[0]))) - __netif_rx_schedule(&adapter->polling_netdev[0]); - else + if (likely(netif_rx_schedule_prep(netdev))) { + adapter->total_tx_bytes = 0; + adapter->total_tx_packets = 0; + adapter->total_rx_bytes = 0; + adapter->total_rx_packets = 0; + __netif_rx_schedule(netdev); + } else + /* this really should not happen! if it does it is basically a + * bug, but not a hard error, so enable ints and continue */ e1000_irq_enable(adapter); -#endif /* CONFIG_E1000_MQ */ - -#else /* if !CONFIG_E1000_NAPI */ +#else /* Writing IMC and IMS is needed for 82547. * Due to Hub Link bus being occupied, an interrupt * de-assertion message is not able to be sent. @@ -3390,16 +3932,23 @@ e1000_intr(int irq, void *data, struct pt_regs *regs) E1000_WRITE_REG(hw, IMC, ~0); } + adapter->total_tx_bytes = 0; + adapter->total_rx_bytes = 0; + adapter->total_tx_packets = 0; + adapter->total_rx_packets = 0; + for (i = 0; i < E1000_MAX_INTR; i++) if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & !e1000_clean_tx_irq(adapter, adapter->tx_ring))) break; + if (likely(adapter->itr_setting & 3)) + e1000_set_itr(adapter); + if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) e1000_irq_enable(adapter); -#endif /* CONFIG_E1000_NAPI */ - +#endif return IRQ_HANDLED; } @@ -3414,35 +3963,26 @@ e1000_clean(struct net_device *poll_dev, int *budget) { struct e1000_adapter *adapter; int work_to_do = min(*budget, poll_dev->quota); - int tx_cleaned = 0, i = 0, work_done = 0; + int tx_cleaned = 0, work_done = 0; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; /* Keep link state information with original netdev */ - if (!netif_carrier_ok(adapter->netdev)) + if (!netif_carrier_ok(poll_dev)) goto quit_polling; - while (poll_dev != &adapter->polling_netdev[i]) { - i++; - if (unlikely(i == adapter->num_rx_queues)) - BUG(); - } - - if (likely(adapter->num_tx_queues == 1)) { - /* e1000_clean is called per-cpu. This lock protects - * tx_ring[0] from being cleaned by multiple cpus - * simultaneously. A failure obtaining the lock means - * tx_ring[0] is currently being cleaned anyway. */ - if (spin_trylock(&adapter->tx_queue_lock)) { - tx_cleaned = e1000_clean_tx_irq(adapter, - &adapter->tx_ring[0]); - spin_unlock(&adapter->tx_queue_lock); - } - } else - tx_cleaned = e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]); + /* e1000_clean is called per-cpu. This lock protects + * tx_ring[0] from being cleaned by multiple cpus + * simultaneously. A failure obtaining the lock means + * tx_ring[0] is currently being cleaned anyway. */ + if (spin_trylock(&adapter->tx_queue_lock)) { + tx_cleaned = e1000_clean_tx_irq(adapter, + &adapter->tx_ring[0]); + spin_unlock(&adapter->tx_queue_lock); + } - adapter->clean_rx(adapter, &adapter->rx_ring[i], + adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, work_to_do); *budget -= work_done; @@ -3450,8 +3990,10 @@ e1000_clean(struct net_device *poll_dev, int *budget) /* If no Tx and not enough Rx work done, exit the polling mode */ if ((!tx_cleaned && (work_done == 0)) || - !netif_running(adapter->netdev)) { + !netif_running(poll_dev)) { quit_polling: + if (likely(adapter->itr_setting & 3)) + e1000_set_itr(adapter); netif_rx_complete(poll_dev); e1000_irq_enable(adapter); return 0; @@ -3474,7 +4016,11 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_desc *tx_desc, *eop_desc; struct e1000_buffer *buffer_info; unsigned int i, eop; +#ifdef CONFIG_E1000_NAPI + unsigned int count = 0; +#endif boolean_t cleaned = FALSE; + unsigned int total_tx_bytes=0, total_tx_packets=0; i = tx_ring->next_to_clean; eop = tx_ring->buffer_info[i].next_to_watch; @@ -3486,32 +4032,42 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, buffer_info = &tx_ring->buffer_info[i]; cleaned = (i == eop); -#ifdef CONFIG_E1000_MQ - tx_ring->tx_stats.bytes += buffer_info->length; -#endif + if (cleaned) { + struct sk_buff *skb = buffer_info->skb; + unsigned int segs = skb_shinfo(skb)->gso_segs; + total_tx_packets += segs; + total_tx_packets++; + total_tx_bytes += skb->len; + } e1000_unmap_and_free_tx_resource(adapter, buffer_info); - memset(tx_desc, 0, sizeof(struct e1000_tx_desc)); + tx_desc->upper.data = 0; if (unlikely(++i == tx_ring->count)) i = 0; } -#ifdef CONFIG_E1000_MQ - tx_ring->tx_stats.packets++; -#endif - eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); +#ifdef CONFIG_E1000_NAPI +#define E1000_TX_WEIGHT 64 + /* weight of a sort for tx, to avoid endless transmit cleanup */ + if (count++ == E1000_TX_WEIGHT) break; +#endif } tx_ring->next_to_clean = i; - spin_lock(&tx_ring->tx_lock); - - if (unlikely(cleaned && netif_queue_stopped(netdev) && - netif_carrier_ok(netdev))) - netif_wake_queue(netdev); - - spin_unlock(&tx_ring->tx_lock); +#define TX_WAKE_THRESHOLD 32 + if (unlikely(cleaned && netif_carrier_ok(netdev) && + E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (netif_queue_stopped(netdev)) { + netif_wake_queue(netdev); + ++adapter->restart_queue; + } + } if (adapter->detect_tx_hung) { /* Detect a transmit hang in hardware, this serializes the @@ -3519,7 +4075,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, adapter->detect_tx_hung = FALSE; if (tx_ring->buffer_info[eop].dma && time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + - adapter->tx_timeout_factor * HZ) + (adapter->tx_timeout_factor * HZ)) && !(E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF)) { @@ -3548,6 +4104,8 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, netif_stop_queue(netdev); } } + adapter->total_tx_bytes += total_tx_bytes; + adapter->total_tx_packets += total_tx_packets; return cleaned; } @@ -3559,7 +4117,7 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter, * @sk_buff: socket buffer with received data **/ -static inline void +static void e1000_rx_checksum(struct e1000_adapter *adapter, uint32_t status_err, uint32_t csum, struct sk_buff *skb) @@ -3597,7 +4155,7 @@ e1000_rx_checksum(struct e1000_adapter *adapter, */ csum = ntohl(csum ^ 0xFFFF); skb->csum = csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } adapter->hw_csum_good++; } @@ -3627,14 +4185,16 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, unsigned int i; int cleaned_count = 0; boolean_t cleaned = FALSE; + unsigned int total_rx_bytes=0, total_rx_packets=0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC(*rx_ring, i); buffer_info = &rx_ring->buffer_info[i]; while (rx_desc->status & E1000_RXD_STAT_DD) { - struct sk_buff *skb, *next_skb; + struct sk_buff *skb; u8 status; + #ifdef CONFIG_E1000_NAPI if (*work_done >= work_to_do) break; @@ -3644,10 +4204,13 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, skb = buffer_info->skb; buffer_info->skb = NULL; + prefetch(skb->data - NET_IP_ALIGN); + if (++i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC(*rx_ring, i); + prefetch(next_rxd); + next_buffer = &rx_ring->buffer_info[i]; - next_skb = next_buffer->skb; cleaned = TRUE; cleaned_count++; @@ -3662,7 +4225,8 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, /* All receives must fit into a single buffer */ E1000_DBG("%s: Receive packet consumed multiple" " buffers\n", netdev->name); - dev_kfree_skb_irq(skb); + /* recycle */ + buffer_info->skb = skb; goto next_desc; } @@ -3678,33 +4242,39 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, flags); length--; } else { - dev_kfree_skb_irq(skb); + /* recycle */ + buffer_info->skb = skb; goto next_desc; } } + /* adjust length to remove Ethernet CRC, this must be + * done after the TBI_ACCEPT workaround above */ + length -= 4; + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += length; + total_rx_packets++; + /* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ -#define E1000_CB_LENGTH 256 - if (length < E1000_CB_LENGTH) { + if (length < copybreak) { struct sk_buff *new_skb = - dev_alloc_skb(length + NET_IP_ALIGN); + netdev_alloc_skb(netdev, length + NET_IP_ALIGN); if (new_skb) { skb_reserve(new_skb, NET_IP_ALIGN); - new_skb->dev = netdev; memcpy(new_skb->data - NET_IP_ALIGN, skb->data - NET_IP_ALIGN, length + NET_IP_ALIGN); /* save the skb in buffer_info as good */ buffer_info->skb = skb; skb = new_skb; - skb_put(skb, length); } - } else - skb_put(skb, length); - + /* else just continue with the old one */ + } /* end copybreak code */ + skb_put(skb, length); /* Receive Checksum Offload */ e1000_rx_checksum(adapter, @@ -3733,10 +4303,6 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; -#ifdef CONFIG_E1000_MQ - rx_ring->rx_stats.packets++; - rx_ring->rx_stats.bytes += length; -#endif next_desc: rx_desc->status = 0; @@ -3747,6 +4313,7 @@ next_desc: cleaned_count = 0; } + /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; } @@ -3756,6 +4323,8 @@ next_desc: if (cleaned_count) adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); + adapter->total_rx_packets += total_rx_packets; + adapter->total_rx_bytes += total_rx_bytes; return cleaned; } @@ -3780,11 +4349,12 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info, *next_buffer; struct e1000_ps_page *ps_page; struct e1000_ps_page_dma *ps_page_dma; - struct sk_buff *skb, *next_skb; + struct sk_buff *skb; unsigned int i, j; uint32_t length, staterr; int cleaned_count = 0; boolean_t cleaned = FALSE; + unsigned int total_rx_bytes=0, total_rx_packets=0; i = rx_ring->next_to_clean; rx_desc = E1000_RX_DESC_PS(*rx_ring, i); @@ -3801,10 +4371,14 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, #endif skb = buffer_info->skb; + /* in the packet split case this is header only */ + prefetch(skb->data - NET_IP_ALIGN); + if (++i == rx_ring->count) i = 0; next_rxd = E1000_RX_DESC_PS(*rx_ring, i); + prefetch(next_rxd); + next_buffer = &rx_ring->buffer_info[i]; - next_skb = next_buffer->skb; cleaned = TRUE; cleaned_count++; @@ -3836,24 +4410,58 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, /* Good Receive */ skb_put(skb, length); + { + /* this looks ugly, but it seems compiler issues make it + more efficient than reusing j */ + int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]); + + /* page alloc/put takes too long and effects small packet + * throughput, so unsplit small packets and save the alloc/put*/ + if (l1 && (l1 <= copybreak) && ((length + l1) <= adapter->rx_ps_bsize0)) { + u8 *vaddr; + /* there is no documentation about how to call + * kmap_atomic, so we can't hold the mapping + * very long */ + pci_dma_sync_single_for_cpu(pdev, + ps_page_dma->ps_page_dma[0], + PAGE_SIZE, + PCI_DMA_FROMDEVICE); + vaddr = kmap_atomic(ps_page->ps_page[0], + KM_SKB_DATA_SOFTIRQ); + memcpy(skb->tail, vaddr, l1); + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); + pci_dma_sync_single_for_device(pdev, + ps_page_dma->ps_page_dma[0], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + /* remove the CRC */ + l1 -= 4; + skb_put(skb, l1); + goto copydone; + } /* if */ + } + for (j = 0; j < adapter->rx_ps_pages; j++) { - if (!(length = le16_to_cpu(rx_desc->wb.upper.length[j]))) + if (!(length= le16_to_cpu(rx_desc->wb.upper.length[j]))) break; - pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j], PAGE_SIZE, PCI_DMA_FROMDEVICE); ps_page_dma->ps_page_dma[j] = 0; - skb_shinfo(skb)->frags[j].page = - ps_page->ps_page[j]; + skb_fill_page_desc(skb, j, ps_page->ps_page[j], 0, + length); ps_page->ps_page[j] = NULL; - skb_shinfo(skb)->frags[j].page_offset = 0; - skb_shinfo(skb)->frags[j].size = length; - skb_shinfo(skb)->nr_frags++; skb->len += length; skb->data_len += length; skb->truesize += length; } + /* strip the ethernet crc, problem is we're using pages now so + * this whole operation can get a little cpu intensive */ + pskb_trim(skb, skb->len - 4); + +copydone: + total_rx_bytes += skb->len; + total_rx_packets++; + e1000_rx_checksum(adapter, staterr, le16_to_cpu(rx_desc->wb.lower.hi_dword.csum_ip.csum), skb); skb->protocol = eth_type_trans(skb, netdev); @@ -3879,10 +4487,6 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, } #endif /* CONFIG_E1000_NAPI */ netdev->last_rx = jiffies; -#ifdef CONFIG_E1000_MQ - rx_ring->rx_stats.packets++; - rx_ring->rx_stats.bytes += length; -#endif next_desc: rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF); @@ -3894,6 +4498,7 @@ next_desc: cleaned_count = 0; } + /* use prefetched values */ rx_desc = next_rxd; buffer_info = next_buffer; @@ -3905,6 +4510,8 @@ next_desc: if (cleaned_count) adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count); + adapter->total_rx_packets += total_rx_packets; + adapter->total_rx_bytes += total_rx_bytes; return cleaned; } @@ -3930,14 +4537,13 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, buffer_info = &rx_ring->buffer_info[i]; while (cleaned_count--) { - if (!(skb = buffer_info->skb)) - skb = dev_alloc_skb(bufsz); - else { + skb = buffer_info->skb; + if (skb) { skb_trim(skb, 0); goto map_skb; } - + skb = netdev_alloc_skb(netdev, bufsz); if (unlikely(!skb)) { /* Better luck next round */ adapter->alloc_rx_buff_failed++; @@ -3950,7 +4556,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes " "at %p\n", bufsz, skb->data); /* Try again, without freeing the previous */ - skb = dev_alloc_skb(bufsz); + skb = netdev_alloc_skb(netdev, bufsz); /* Failed allocation, critical failure */ if (!skb) { dev_kfree_skb(oldskb); @@ -3962,10 +4568,10 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, dev_kfree_skb(skb); dev_kfree_skb(oldskb); break; /* while !buffer_info->skb */ - } else { - /* Use new allocation */ - dev_kfree_skb(oldskb); } + + /* Use new allocation */ + dev_kfree_skb(oldskb); } /* Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after @@ -3973,8 +4579,6 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, */ skb_reserve(skb, NET_IP_ALIGN); - skb->dev = netdev; - buffer_info->skb = skb; buffer_info->length = adapter->rx_buffer_len; map_skb: @@ -4074,7 +4678,8 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, rx_desc->read.buffer_addr[j+1] = ~0; } - skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); + skb = netdev_alloc_skb(netdev, + adapter->rx_ps_bsize0 + NET_IP_ALIGN); if (unlikely(!skb)) { adapter->alloc_rx_buff_failed++; @@ -4087,8 +4692,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, */ skb_reserve(skb, NET_IP_ALIGN); - skb->dev = netdev; - buffer_info->skb = skb; buffer_info->length = adapter->rx_ps_bsize0; buffer_info->dma = pci_map_single(pdev, skb->data, @@ -4243,7 +4846,7 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) spin_unlock_irqrestore(&adapter->stats_lock, flags); return -EIO; } - if (adapter->hw.phy_type == e1000_phy_m88) { + if (adapter->hw.media_type == e1000_media_type_copper) { switch (data->reg_num) { case PHY_CTRL: if (mii_reg & MII_CR_POWER_DOWN) @@ -4259,8 +4862,8 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) else spddplx = SPEED_10; spddplx += (mii_reg & 0x100) - ? FULL_DUPLEX : - HALF_DUPLEX; + ? DUPLEX_FULL : + DUPLEX_HALF; retval = e1000_set_spd_dplx(adapter, spddplx); if (retval) { @@ -4270,10 +4873,9 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) return retval; } } - if (netif_running(adapter->netdev)) { - e1000_down(adapter); - e1000_up(adapter); - } else + if (netif_running(adapter->netdev)) + e1000_reinit_locked(adapter); + else e1000_reset(adapter); break; case M88E1000_PHY_SPEC_CTRL: @@ -4290,10 +4892,9 @@ e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case PHY_CTRL: if (mii_reg & MII_CR_POWER_DOWN) break; - if (netif_running(adapter->netdev)) { - e1000_down(adapter); - e1000_up(adapter); - } else + if (netif_running(adapter->netdev)) + e1000_reinit_locked(adapter); + else e1000_reset(adapter); break; } @@ -4340,10 +4941,19 @@ e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) pci_write_config_word(adapter->pdev, reg, *value); } -uint32_t -e1000_io_read(struct e1000_hw *hw, unsigned long port) +int32_t +e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) { - return inl(port); + struct e1000_adapter *adapter = hw->back; + uint16_t cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -E1000_ERR_CONFIG; + + pci_read_config_word(adapter->pdev, cap_offset + reg, value); + + return E1000_SUCCESS; } void @@ -4367,25 +4977,31 @@ e1000_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); - /* enable VLAN receive filtering */ - rctl = E1000_READ_REG(&adapter->hw, RCTL); - rctl |= E1000_RCTL_VFE; - rctl &= ~E1000_RCTL_CFIEN; - E1000_WRITE_REG(&adapter->hw, RCTL, rctl); - e1000_update_mng_vlan(adapter); + if (adapter->hw.mac_type != e1000_ich8lan) { + /* enable VLAN receive filtering */ + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl |= E1000_RCTL_VFE; + rctl &= ~E1000_RCTL_CFIEN; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + e1000_update_mng_vlan(adapter); + } } else { /* disable VLAN tag insert/strip */ ctrl = E1000_READ_REG(&adapter->hw, CTRL); ctrl &= ~E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, CTRL, ctrl); - /* disable VLAN filtering */ - rctl = E1000_READ_REG(&adapter->hw, RCTL); - rctl &= ~E1000_RCTL_VFE; - E1000_WRITE_REG(&adapter->hw, RCTL, rctl); - if (adapter->mng_vlan_id != (uint16_t)E1000_MNG_VLAN_NONE) { - e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id); - adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; + if (adapter->hw.mac_type != e1000_ich8lan) { + /* disable VLAN filtering */ + rctl = E1000_READ_REG(&adapter->hw, RCTL); + rctl &= ~E1000_RCTL_VFE; + E1000_WRITE_REG(&adapter->hw, RCTL, rctl); + if (adapter->mng_vlan_id != + (uint16_t)E1000_MNG_VLAN_NONE) { + e1000_vlan_rx_kill_vid(netdev, + adapter->mng_vlan_id); + adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; + } } } @@ -4490,8 +5106,8 @@ e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx) } #ifdef CONFIG_PM -/* these functions save and restore 16 or 64 dwords (64-256 bytes) of config - * space versus the 64 bytes that pci_[save|restore]_state handle +/* Save/restore 16 or 64 dwords of PCI config space depending on which + * bus we're on (PCI(X) vs. PCI-E) */ #define PCIE_CONFIG_SPACE_LEN 256 #define PCI_CONFIG_SPACE_LEN 64 @@ -4501,6 +5117,7 @@ e1000_pci_save_state(struct e1000_adapter *adapter) struct pci_dev *dev = adapter->pdev; int size; int i; + if (adapter->hw.mac_type >= e1000_82571) size = PCIE_CONFIG_SPACE_LEN; else @@ -4524,8 +5141,10 @@ e1000_pci_restore_state(struct e1000_adapter *adapter) struct pci_dev *dev = adapter->pdev; int size; int i; + if (adapter->config_space == NULL) return; + if (adapter->hw.mac_type >= e1000_82571) size = PCIE_CONFIG_SPACE_LEN; else @@ -4543,18 +5162,22 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - uint32_t ctrl, ctrl_ext, rctl, manc, status; + uint32_t ctrl, ctrl_ext, rctl, status; uint32_t wufc = adapter->wol; +#ifdef CONFIG_PM int retval = 0; +#endif netif_device_detach(netdev); - if (netif_running(netdev)) + if (netif_running(netdev)) { + WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); + } #ifdef CONFIG_PM - /* implement our own version of pci_save_state(pdev) because pci - * express adapters have larger 256 byte config spaces */ + /* Implement our own version of pci_save_state(pdev) because pci- + * express adapters have 256-byte config spaces. */ retval = e1000_pci_save_state(adapter); if (retval) return retval; @@ -4569,7 +5192,7 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) e1000_set_multi(netdev); /* turn on all-multi mode if wake on multicast is enabled */ - if (adapter->wol & E1000_WUFC_MC) { + if (wufc & E1000_WUFC_MC) { rctl = E1000_READ_REG(&adapter->hw, RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, RCTL, rctl); @@ -4599,47 +5222,36 @@ e1000_suspend(struct pci_dev *pdev, pm_message_t state) E1000_WRITE_REG(&adapter->hw, WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, WUFC, wufc); - retval = pci_enable_wake(pdev, PCI_D3hot, 1); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); - retval = pci_enable_wake(pdev, PCI_D3cold, 1); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); + pci_enable_wake(pdev, PCI_D3hot, 1); + pci_enable_wake(pdev, PCI_D3cold, 1); } else { E1000_WRITE_REG(&adapter->hw, WUC, 0); E1000_WRITE_REG(&adapter->hw, WUFC, 0); - retval = pci_enable_wake(pdev, PCI_D3hot, 0); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); - retval = pci_enable_wake(pdev, PCI_D3cold, 0); /* 4 == D3 cold */ - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); } - if (adapter->hw.mac_type >= e1000_82540 && - adapter->hw.media_type == e1000_media_type_copper) { - manc = E1000_READ_REG(&adapter->hw, MANC); - if (manc & E1000_MANC_SMBUS_EN) { - manc |= E1000_MANC_ARP_EN; - E1000_WRITE_REG(&adapter->hw, MANC, manc); - retval = pci_enable_wake(pdev, PCI_D3hot, 1); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); - retval = pci_enable_wake(pdev, PCI_D3cold, 1); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); - } + e1000_release_manageability(adapter); + + /* make sure adapter isn't asleep if manageability is enabled */ + if (adapter->en_mng_pt) { + pci_enable_wake(pdev, PCI_D3hot, 1); + pci_enable_wake(pdev, PCI_D3cold, 1); } + if (adapter->hw.phy_type == e1000_phy_igp_3) + e1000_phy_powerdown_workaround(&adapter->hw); + + if (netif_running(netdev)) + e1000_free_irq(adapter); + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ e1000_release_hw_control(adapter); pci_disable_device(pdev); - retval = pci_set_power_state(pdev, pci_choose_state(pdev, state)); - if (retval) - DPRINTK(PROBE, ERR, "Error in setting power state\n"); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); return 0; } @@ -4650,38 +5262,33 @@ e1000_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - int retval; - uint32_t manc, ret_val; + uint32_t err; - retval = pci_set_power_state(pdev, PCI_D0); - if (retval) - DPRINTK(PROBE, ERR, "Error in setting power state\n"); + pci_set_power_state(pdev, PCI_D0); e1000_pci_restore_state(adapter); - ret_val = pci_enable_device(pdev); + if ((err = pci_enable_device(pdev))) { + printk(KERN_ERR "e1000: Cannot enable PCI device from suspend\n"); + return err; + } pci_set_master(pdev); - retval = pci_enable_wake(pdev, PCI_D3hot, 0); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 wake\n"); - retval = pci_enable_wake(pdev, PCI_D3cold, 0); - if (retval) - DPRINTK(PROBE, ERR, "Error enabling D3 cold wake\n"); + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + if (netif_running(netdev) && (err = e1000_request_irq(adapter))) + return err; + e1000_power_up_phy(adapter); e1000_reset(adapter); E1000_WRITE_REG(&adapter->hw, WUS, ~0); + e1000_init_manageability(adapter); + if (netif_running(netdev)) e1000_up(adapter); netif_device_attach(netdev); - if (adapter->hw.mac_type >= e1000_82540 && - adapter->hw.media_type == e1000_media_type_copper) { - manc = E1000_READ_REG(&adapter->hw, MANC); - manc &= ~(E1000_MANC_ARP_EN); - E1000_WRITE_REG(&adapter->hw, MANC, manc); - } - /* If the controller is 82573 and f/w is AMT, do not set * DRV_LOAD until the interface is up. For all other cases, * let the f/w know that the h/w is now under the control @@ -4693,6 +5300,12 @@ e1000_resume(struct pci_dev *pdev) return 0; } #endif + +static void e1000_shutdown(struct pci_dev *pdev) +{ + e1000_suspend(pdev, PMSG_SUSPEND); +} + #ifdef CONFIG_NET_POLL_CONTROLLER /* * Polling 'interrupt' - used by things like netconsole to send skbs @@ -4703,8 +5316,9 @@ static void e1000_netpoll(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); + disable_irq(adapter->pdev->irq); - e1000_intr(adapter->pdev->irq, netdev, NULL); + e1000_intr(adapter->pdev->irq, netdev); e1000_clean_tx_irq(adapter, adapter->tx_ring); #ifndef CONFIG_E1000_NAPI adapter->clean_rx(adapter, adapter->rx_ring); @@ -4713,4 +5327,88 @@ e1000_netpoll(struct net_device *netdev) } #endif +/** + * e1000_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci conneection state + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + + netif_device_detach(netdev); + + if (netif_running(netdev)) + e1000_down(adapter); + pci_disable_device(pdev); + + /* Request a slot slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * e1000_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Restart the card from scratch, as if from a cold-boot. Implementation + * resembles the first-half of the e1000_resume routine. + */ +static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + + if (pci_enable_device(pdev)) { + printk(KERN_ERR "e1000: Cannot re-enable PCI device after reset.\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + pci_set_master(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + e1000_reset(adapter); + E1000_WRITE_REG(&adapter->hw, WUS, ~0); + + return PCI_ERS_RESULT_RECOVERED; +} + +/** + * e1000_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. Implementation resembles the + * second-half of the e1000_resume routine. + */ +static void e1000_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct e1000_adapter *adapter = netdev->priv; + + e1000_init_manageability(adapter); + + if (netif_running(netdev)) { + if (e1000_up(adapter)) { + printk("e1000: can't bring device back up after reset\n"); + return; + } + } + + netif_device_attach(netdev); + + /* If the controller is 82573 and f/w is AMT, do not set + * DRV_LOAD until the interface is up. For all other cases, + * let the f/w know that the h/w is now under the control + * of the driver. */ + if (adapter->hw.mac_type != e1000_82573 || + !e1000_check_mng_mode(&adapter->hw)) + e1000_get_hw_control(adapter); + +} + /* e1000_main.c */