/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
#include "ipoib.h"
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/in.h>
+#include <net/dst.h>
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
-int debug_level;
+int ipoib_debug_level;
-module_param(debug_level, int, 0644);
+module_param_named(debug_level, ipoib_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif
+struct ipoib_path_iter {
+ struct net_device *dev;
+ struct ipoib_path path;
+};
+
static const u8 ipv4_bcast_addr[] = {
0x00, 0xff, 0xff, 0xff,
0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
if (ipoib_ib_dev_open(dev))
return -EINVAL;
- if (ipoib_ib_dev_up(dev))
+ if (ipoib_ib_dev_up(dev)) {
+ ipoib_ib_dev_stop(dev);
return -EINVAL;
+ }
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
/* Bring up any child interfaces too */
- down(&priv->vlan_mutex);
+ mutex_lock(&priv->vlan_mutex);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
dev_change_flags(cpriv->dev, flags | IFF_UP);
}
- up(&priv->vlan_mutex);
+ mutex_unlock(&priv->vlan_mutex);
}
netif_start_queue(dev);
struct ipoib_dev_priv *cpriv;
/* Bring down any child interfaces too */
- down(&priv->vlan_mutex);
+ mutex_lock(&priv->vlan_mutex);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
dev_change_flags(cpriv->dev, flags & ~IFF_UP);
}
- up(&priv->vlan_mutex);
+ mutex_unlock(&priv->vlan_mutex);
}
return 0;
return 0;
}
-static void __path_free(struct net_device *dev, struct ipoib_path *path)
+static void path_free(struct net_device *dev, struct ipoib_path *path)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh, *tn;
struct sk_buff *skb;
+ unsigned long flags;
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
+ spin_lock_irqsave(&priv->lock, flags);
+
list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
+ /*
+ * It's safe to call ipoib_put_ah() inside priv->lock
+ * here, because we know that path->ah will always
+ * hold one more reference, so ipoib_put_ah() will
+ * never do more than decrement the ref count.
+ */
if (neigh->ah)
ipoib_put_ah(neigh->ah);
*to_ipoib_neigh(neigh->neighbour) = NULL;
kfree(neigh);
}
+ spin_unlock_irqrestore(&priv->lock, flags);
+
if (path->ah)
ipoib_put_ah(path->ah);
- rb_erase(&path->rb_node, &priv->path_tree);
- list_del(&path->list);
kfree(path);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
+{
+ struct ipoib_path_iter *iter;
+
+ iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->dev = dev;
+ memset(iter->path.pathrec.dgid.raw, 0, 16);
+
+ if (ipoib_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+int ipoib_path_iter_next(struct ipoib_path_iter *iter)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct rb_node *n;
+ struct ipoib_path *path;
+ int ret = 1;
+
+ spin_lock_irq(&priv->lock);
+
+ n = rb_first(&priv->path_tree);
+
+ while (n) {
+ path = rb_entry(n, struct ipoib_path, rb_node);
+
+ if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
+ sizeof (union ib_gid)) < 0) {
+ iter->path = *path;
+ ret = 0;
+ break;
+ }
+
+ n = rb_next(n);
+ }
+
+ spin_unlock_irq(&priv->lock);
+
+ return ret;
+}
+
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+ struct ipoib_path *path)
+{
+ *path = iter->path;
+}
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
spin_lock_irqsave(&priv->lock, flags);
+
list_splice(&priv->path_list, &remove_list);
INIT_LIST_HEAD(&priv->path_list);
+
+ list_for_each_entry(path, &remove_list, list)
+ rb_erase(&path->rb_node, &priv->path_tree);
+
spin_unlock_irqrestore(&priv->lock, flags);
list_for_each_entry_safe(path, tp, &remove_list, list) {
if (path->query)
ib_sa_cancel_query(path->query_id, path->query);
wait_for_completion(&path->done);
- __path_free(dev, path);
+ path_free(dev, path);
}
}
.sl = pathrec->sl,
.port_num = priv->port
};
+ int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
- if (ib_sa_rate_enum_to_int(pathrec->rate) > 0)
- av.static_rate = (2 * priv->local_rate -
- ib_sa_rate_enum_to_int(pathrec->rate) - 1) /
- (priv->local_rate ? priv->local_rate : 1);
+ if (path_rate > 0 && priv->local_rate > path_rate)
+ av.static_rate = (priv->local_rate - 1) / path_rate;
ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
av.static_rate, priv->local_rate,
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
- } else
- path->query = NULL;
+ }
+ path->query = NULL;
complete(&path->done);
spin_unlock_irqrestore(&priv->lock, flags);
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
- path = kmalloc(sizeof *path, GFP_ATOMIC);
+ path = kzalloc(sizeof *path, GFP_ATOMIC);
if (!path)
return NULL;
path->dev = dev;
- path->pathrec.dlid = 0;
skb_queue_head_init(&path->queue);
INIT_LIST_HEAD(&path->neigh_list);
- path->query = NULL;
- init_completion(&path->done);
memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
path->pathrec.pkey = cpu_to_be16(priv->pkey);
path->pathrec.numb_path = 1;
- __path_add(dev, path);
-
return path;
}
ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(path->pathrec.dgid));
+ init_completion(&path->done);
+
path->query_id =
ib_sa_path_rec_get(priv->ca, priv->port,
&path->pathrec,
(union ib_gid *) (skb->dst->neighbour->ha + 4));
if (!path)
goto err;
+
+ __path_add(dev, path);
}
list_add_tail(&neigh->list, &path->neigh_list);
- if (path->pathrec.dlid) {
+ if (path->ah) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
err:
*to_ipoib_neigh(skb->dst->neighbour) = NULL;
list_del(&neigh->list);
- kfree(neigh);
neigh->neighbour->ops->destructor = NULL;
+ kfree(neigh);
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
spin_unlock(&priv->lock);
}
-static void path_lookup(struct sk_buff *skb, struct net_device *dev)
+static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
skb_push(skb, sizeof *phdr);
__skb_queue_tail(&path->queue, skb);
- if (path_rec_start(dev, path))
- __path_free(dev, path);
+ if (path_rec_start(dev, path)) {
+ spin_unlock(&priv->lock);
+ path_free(dev, path);
+ return;
+ } else
+ __path_add(dev, path);
} else {
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
return;
}
- if (path->pathrec.dlid) {
+ if (path->ah) {
ipoib_dbg(priv, "Send unicast ARP to %04x\n",
be16_to_cpu(path->pathrec.dlid));
struct ipoib_neigh *neigh;
unsigned long flags;
- local_irq_save(flags);
- if (!spin_trylock(&priv->tx_lock)) {
- local_irq_restore(flags);
+ if (!spin_trylock_irqsave(&priv->tx_lock, flags))
return NETDEV_TX_LOCKED;
- }
/*
* Check if our queue is stopped. Since we have the LLTX bit
if (skb->dst && skb->dst->neighbour) {
if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
- path_lookup(skb, dev);
+ ipoib_path_lookup(skb, dev);
goto out;
}
ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
} else {
- /* unicast GID -- should be ARP reply */
+ /* unicast GID -- should be ARP or RARP reply */
- if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
+ if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
+ (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
IPOIB_GID_FMT "\n",
skb->dst ? "neigh" : "dst",
- be16_to_cpup((u16 *) skb->data),
- be32_to_cpup((u32 *) phdr->hwaddr),
+ be16_to_cpup((__be16 *) skb->data),
+ be32_to_cpup((__be32 *) phdr->hwaddr),
IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
dev_kfree_skb_any(skb);
++priv->stats.tx_dropped;
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- ipoib_warn(priv, "transmit timeout: latency %ld\n",
- jiffies - dev->trans_start);
+ ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+ jiffies_to_msecs(jiffies - dev->trans_start));
+ ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+ netif_queue_stopped(dev),
+ priv->tx_head, priv->tx_tail);
/* XXX reset QP, etc. */
}
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- schedule_work(&priv->restart_task);
+ queue_work(ipoib_workqueue, &priv->restart_task);
}
static void ipoib_neigh_destructor(struct neighbour *n)
{
- struct ipoib_neigh *neigh = *to_ipoib_neigh(n);
+ struct ipoib_neigh *neigh;
struct ipoib_dev_priv *priv = netdev_priv(n->dev);
unsigned long flags;
+ struct ipoib_ah *ah = NULL;
ipoib_dbg(priv,
"neigh_destructor for %06x " IPOIB_GID_FMT "\n",
spin_lock_irqsave(&priv->lock, flags);
+ neigh = *to_ipoib_neigh(n);
if (neigh) {
if (neigh->ah)
- ipoib_put_ah(neigh->ah);
+ ah = neigh->ah;
list_del(&neigh->list);
*to_ipoib_neigh(n) = NULL;
kfree(neigh);
}
spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (ah)
+ ipoib_put_ah(ah);
}
static int ipoib_neigh_setup(struct neighbour *neigh)
/* Allocate RX/TX "rings" to hold queued skbs */
- priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, IPOIB_RX_RING_SIZE);
goto out;
}
- memset(priv->rx_ring, 0,
- IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
- priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
GFP_KERNEL);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
ca->name, IPOIB_TX_RING_SIZE);
goto out_rx_ring_cleanup;
}
- memset(priv->tx_ring, 0,
- IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
/* priv->tx_head & tx_tail are already 0 */
{
struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
- ipoib_delete_debug_file(dev);
+ ipoib_delete_debug_files(dev);
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
ipoib_ib_dev_cleanup(dev);
- if (priv->rx_ring) {
- kfree(priv->rx_ring);
- priv->rx_ring = NULL;
- }
+ kfree(priv->rx_ring);
+ kfree(priv->tx_ring);
- if (priv->tx_ring) {
- kfree(priv->tx_ring);
- priv->tx_ring = NULL;
- }
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
}
static void ipoib_setup(struct net_device *dev)
dev->watchdog_timeo = HZ;
- dev->rebuild_header = NULL;
- dev->set_mac_address = NULL;
- dev->header_cache_update = NULL;
-
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
/*
spin_lock_init(&priv->lock);
spin_lock_init(&priv->tx_lock);
- init_MUTEX(&priv->mcast_mutex);
- init_MUTEX(&priv->vlan_mutex);
+ mutex_init(&priv->mcast_mutex);
+ mutex_init(&priv->vlan_mutex);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
if (pkey < 0 || pkey > 0xffff)
return -EINVAL;
+ /*
+ * Set the full membership bit, so that we join the right
+ * broadcast group, etc.
+ */
+ pkey |= 0x8000;
+
ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
pkey);
goto alloc_mem_failed;
}
+ /*
+ * Set the full membership bit, so that we join the right
+ * broadcast group, etc.
+ */
+ priv->pkey |= 0x8000;
+
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
goto register_failed;
}
- if (ipoib_create_debug_file(priv->dev))
- goto debug_failed;
+ ipoib_create_debug_files(priv->dev);
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+ ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
ib_unregister_event_handler(&priv->event_handler);
+ flush_scheduled_work();
event_failed:
ipoib_dev_cleanup(priv->dev);
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
+ flush_scheduled_work();
unregister_netdev(priv->dev);
ipoib_dev_cleanup(priv->dev);
free_netdev(priv->dev);
}
+
+ kfree(dev_list);
}
static int __init ipoib_init_module(void)
return 0;
-err_fs:
- ipoib_unregister_debugfs();
-
err_wq:
destroy_workqueue(ipoib_workqueue);
+err_fs:
+ ipoib_unregister_debugfs();
+
return ret;
}
static void __exit ipoib_cleanup_module(void)
{
- ipoib_unregister_debugfs();
ib_unregister_client(&ipoib_client);
+ ipoib_unregister_debugfs();
destroy_workqueue(ipoib_workqueue);
}