linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / drivers / infiniband / ulp / ipoib / ipoib_main.c
index 6f60abb..c3b5f79 100644 (file)
@@ -1,5 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +36,6 @@
 
 #include "ipoib.h"
 
-#include <linux/version.h>
 #include <linux/module.h>
 
 #include <linux/init.h>
@@ -46,6 +47,8 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 
+#include <net/dst.h>
+
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -57,6 +60,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 #endif
 
+struct ipoib_path_iter {
+       struct net_device *dev;
+       struct ipoib_path  path;
+};
+
 static const u8 ipv4_bcast_addr[] = {
        0x00, 0xff, 0xff, 0xff,
        0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
@@ -88,14 +96,16 @@ int ipoib_open(struct net_device *dev)
        if (ipoib_ib_dev_open(dev))
                return -EINVAL;
 
-       if (ipoib_ib_dev_up(dev))
+       if (ipoib_ib_dev_up(dev)) {
+               ipoib_ib_dev_stop(dev);
                return -EINVAL;
+       }
 
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                struct ipoib_dev_priv *cpriv;
 
                /* Bring up any child interfaces too */
-               down(&priv->vlan_mutex);
+               mutex_lock(&priv->vlan_mutex);
                list_for_each_entry(cpriv, &priv->child_intfs, list) {
                        int flags;
 
@@ -105,7 +115,7 @@ int ipoib_open(struct net_device *dev)
 
                        dev_change_flags(cpriv->dev, flags | IFF_UP);
                }
-               up(&priv->vlan_mutex);
+               mutex_unlock(&priv->vlan_mutex);
        }
 
        netif_start_queue(dev);
@@ -130,7 +140,7 @@ static int ipoib_stop(struct net_device *dev)
                struct ipoib_dev_priv *cpriv;
 
                /* Bring down any child interfaces too */
-               down(&priv->vlan_mutex);
+               mutex_lock(&priv->vlan_mutex);
                list_for_each_entry(cpriv, &priv->child_intfs, list) {
                        int flags;
 
@@ -140,7 +150,7 @@ static int ipoib_stop(struct net_device *dev)
 
                        dev_change_flags(cpriv->dev, flags & ~IFF_UP);
                }
-               up(&priv->vlan_mutex);
+               mutex_unlock(&priv->vlan_mutex);
        }
 
        return 0;
@@ -249,6 +259,64 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
        kfree(path);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
+{
+       struct ipoib_path_iter *iter;
+
+       iter = kmalloc(sizeof *iter, GFP_KERNEL);
+       if (!iter)
+               return NULL;
+
+       iter->dev = dev;
+       memset(iter->path.pathrec.dgid.raw, 0, 16);
+
+       if (ipoib_path_iter_next(iter)) {
+               kfree(iter);
+               return NULL;
+       }
+
+       return iter;
+}
+
+int ipoib_path_iter_next(struct ipoib_path_iter *iter)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+       struct rb_node *n;
+       struct ipoib_path *path;
+       int ret = 1;
+
+       spin_lock_irq(&priv->lock);
+
+       n = rb_first(&priv->path_tree);
+
+       while (n) {
+               path = rb_entry(n, struct ipoib_path, rb_node);
+
+               if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
+                          sizeof (union ib_gid)) < 0) {
+                       iter->path = *path;
+                       ret = 0;
+                       break;
+               }
+
+               n = rb_next(n);
+       }
+
+       spin_unlock_irq(&priv->lock);
+
+       return ret;
+}
+
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+                         struct ipoib_path *path)
+{
+       *path = iter->path;
+}
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
+
 void ipoib_flush_paths(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -334,9 +402,9 @@ static void path_rec_completion(int status,
                        while ((skb = __skb_dequeue(&neigh->queue)))
                                __skb_queue_tail(&skqueue, skb);
                }
-       } else
-               path->query = NULL;
+       }
 
+       path->query = NULL;
        complete(&path->done);
 
        spin_unlock_irqrestore(&priv->lock, flags);
@@ -355,19 +423,15 @@ static struct ipoib_path *path_rec_create(struct net_device *dev,
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_path *path;
 
-       path = kmalloc(sizeof *path, GFP_ATOMIC);
+       path = kzalloc(sizeof *path, GFP_ATOMIC);
        if (!path)
                return NULL;
 
-       path->dev          = dev;
-       path->pathrec.dlid = 0;
-       path->ah           = NULL;
+       path->dev = dev;
 
        skb_queue_head_init(&path->queue);
 
        INIT_LIST_HEAD(&path->neigh_list);
-       path->query = NULL;
-       init_completion(&path->done);
 
        memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
        path->pathrec.sgid      = priv->local_gid;
@@ -385,6 +449,8 @@ static int path_rec_start(struct net_device *dev,
        ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n",
                  IPOIB_GID_ARG(path->pathrec.dgid));
 
+       init_completion(&path->done);
+
        path->query_id =
                ib_sa_path_rec_get(priv->ca, priv->port,
                                   &path->pathrec,
@@ -439,7 +505,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 
        list_add_tail(&neigh->list, &path->neigh_list);
 
-       if (path->pathrec.dlid) {
+       if (path->ah) {
                kref_get(&path->ah->ref);
                neigh->ah = path->ah;
 
@@ -473,7 +539,7 @@ err:
        spin_unlock(&priv->lock);
 }
 
-static void path_lookup(struct sk_buff *skb, struct net_device *dev)
+static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
 
@@ -525,7 +591,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                return;
        }
 
-       if (path->pathrec.dlid) {
+       if (path->ah) {
                ipoib_dbg(priv, "Send unicast ARP to %04x\n",
                          be16_to_cpu(path->pathrec.dlid));
 
@@ -550,11 +616,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct ipoib_neigh *neigh;
        unsigned long flags;
 
-       local_irq_save(flags);
-       if (!spin_trylock(&priv->tx_lock)) {
-               local_irq_restore(flags);
+       if (!spin_trylock_irqsave(&priv->tx_lock, flags))
                return NETDEV_TX_LOCKED;
-       }
 
        /*
         * Check if our queue is stopped.  Since we have the LLTX bit
@@ -568,7 +631,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (skb->dst && skb->dst->neighbour) {
                if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
-                       path_lookup(skb, dev);
+                       ipoib_path_lookup(skb, dev);
                        goto out;
                }
 
@@ -600,14 +663,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
                        ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
                } else {
-                       /* unicast GID -- should be ARP reply */
+                       /* unicast GID -- should be ARP or RARP reply */
 
-                       if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
+                       if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
+                           (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
                                ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
                                           IPOIB_GID_FMT "\n",
                                           skb->dst ? "neigh" : "dst",
-                                          be16_to_cpup((u16 *) skb->data),
-                                          be32_to_cpup((u32 *) phdr->hwaddr),
+                                          be16_to_cpup((__be16 *) skb->data),
+                                          be32_to_cpup((__be32 *) phdr->hwaddr),
                                           IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
                                dev_kfree_skb_any(skb);
                                ++priv->stats.tx_dropped;
@@ -635,8 +699,11 @@ static void ipoib_timeout(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       ipoib_warn(priv, "transmit timeout: latency %ld\n",
-                  jiffies - dev->trans_start);
+       ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+                  jiffies_to_msecs(jiffies - dev->trans_start));
+       ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+                  netif_queue_stopped(dev),
+                  priv->tx_head, priv->tx_tail);
        /* XXX reset QP, etc. */
 }
 
@@ -670,7 +737,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       schedule_work(&priv->restart_task);
+       queue_work(ipoib_workqueue, &priv->restart_task);
 }
 
 static void ipoib_neigh_destructor(struct neighbour *n)
@@ -727,25 +794,21 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 
        /* Allocate RX/TX "rings" to hold queued skbs */
 
-       priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+       priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
                                GFP_KERNEL);
        if (!priv->rx_ring) {
                printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                       ca->name, IPOIB_RX_RING_SIZE);
                goto out;
        }
-       memset(priv->rx_ring, 0,
-              IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
 
-       priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+       priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
                                GFP_KERNEL);
        if (!priv->tx_ring) {
                printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
                       ca->name, IPOIB_TX_RING_SIZE);
                goto out_rx_ring_cleanup;
        }
-       memset(priv->tx_ring, 0,
-              IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
 
        /* priv->tx_head & tx_tail are already 0 */
 
@@ -768,7 +831,7 @@ void ipoib_dev_cleanup(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
 
-       ipoib_delete_debug_file(dev);
+       ipoib_delete_debug_files(dev);
 
        /* Delete any child interfaces first */
        list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
@@ -779,15 +842,11 @@ void ipoib_dev_cleanup(struct net_device *dev)
 
        ipoib_ib_dev_cleanup(dev);
 
-       if (priv->rx_ring) {
-               kfree(priv->rx_ring);
-               priv->rx_ring = NULL;
-       }
+       kfree(priv->rx_ring);
+       kfree(priv->tx_ring);
 
-       if (priv->tx_ring) {
-               kfree(priv->tx_ring);
-               priv->tx_ring = NULL;
-       }
+       priv->rx_ring = NULL;
+       priv->tx_ring = NULL;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -806,10 +865,6 @@ static void ipoib_setup(struct net_device *dev)
 
        dev->watchdog_timeo      = HZ;
 
-       dev->rebuild_header      = NULL;
-       dev->set_mac_address     = NULL;
-       dev->header_cache_update = NULL;
-
        dev->flags              |= IFF_BROADCAST | IFF_MULTICAST;
 
        /*
@@ -837,8 +892,8 @@ static void ipoib_setup(struct net_device *dev)
        spin_lock_init(&priv->lock);
        spin_lock_init(&priv->tx_lock);
 
-       init_MUTEX(&priv->mcast_mutex);
-       init_MUTEX(&priv->vlan_mutex);
+       mutex_init(&priv->mcast_mutex);
+       mutex_init(&priv->vlan_mutex);
 
        INIT_LIST_HEAD(&priv->path_list);
        INIT_LIST_HEAD(&priv->child_intfs);
@@ -885,6 +940,12 @@ static ssize_t create_child(struct class_device *cdev,
        if (pkey < 0 || pkey > 0xffff)
                return -EINVAL;
 
+       /*
+        * Set the full membership bit, so that we join the right
+        * broadcast group, etc.
+        */
+       pkey |= 0x8000;
+
        ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
                             pkey);
 
@@ -937,6 +998,12 @@ static struct net_device *ipoib_add_port(const char *format,
                goto alloc_mem_failed;
        }
 
+       /*
+        * Set the full membership bit, so that we join the right
+        * broadcast group, etc.
+        */
+       priv->pkey |= 0x8000;
+
        priv->dev->broadcast[8] = priv->pkey >> 8;
        priv->dev->broadcast[9] = priv->pkey & 0xff;
 
@@ -973,8 +1040,7 @@ static struct net_device *ipoib_add_port(const char *format,
                goto register_failed;
        }
 
-       if (ipoib_create_debug_file(priv->dev))
-               goto debug_failed;
+       ipoib_create_debug_files(priv->dev);
 
        if (ipoib_add_pkey_attr(priv->dev))
                goto sysfs_failed;
@@ -988,13 +1054,12 @@ static struct net_device *ipoib_add_port(const char *format,
        return priv->dev;
 
 sysfs_failed:
-       ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+       ipoib_delete_debug_files(priv->dev);
        unregister_netdev(priv->dev);
 
 register_failed:
        ib_unregister_event_handler(&priv->event_handler);
+       flush_scheduled_work();
 
 event_failed:
        ipoib_dev_cleanup(priv->dev);
@@ -1047,11 +1112,14 @@ static void ipoib_remove_one(struct ib_device *device)
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
                ib_unregister_event_handler(&priv->event_handler);
+               flush_scheduled_work();
 
                unregister_netdev(priv->dev);
                ipoib_dev_cleanup(priv->dev);
                free_netdev(priv->dev);
        }
+
+       kfree(dev_list);
 }
 
 static int __init ipoib_init_module(void)