X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Finfiniband%2Fulp%2Fipoib%2Fipoib_main.c;h=cb078a7d0bf5b86551adf812fbc93724883616f8;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=6f60abbaebd5602e472f625edc144af0d465aca8;hpb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;p=linux-2.6.git diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6f60abbae..cb078a7d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,22 +36,32 @@ #include "ipoib.h" -#include #include #include #include #include +#include #include /* For ARPHRD_xxx */ #include #include +#include + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); +int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; +int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; + +module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); +MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); +module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); +MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -57,6 +69,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif +struct ipoib_path_iter { + struct net_device *dev; + struct ipoib_path path; +}; + static const u8 ipv4_bcast_addr[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, @@ -88,14 +105,16 @@ int ipoib_open(struct net_device *dev) if (ipoib_ib_dev_open(dev)) return -EINVAL; - if (ipoib_ib_dev_up(dev)) + if (ipoib_ib_dev_up(dev)) { + ipoib_ib_dev_stop(dev); return -EINVAL; + } if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring up any child interfaces too */ - down(&priv->vlan_mutex); + mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) { int flags; @@ -105,7 +124,7 @@ int ipoib_open(struct net_device *dev) dev_change_flags(cpriv->dev, flags | IFF_UP); } - up(&priv->vlan_mutex); + mutex_unlock(&priv->vlan_mutex); } netif_start_queue(dev); @@ -123,14 +142,20 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - ipoib_ib_dev_down(dev); + /* + * Now flush workqueue to make sure a scheduled task doesn't + * bring our internal state back up. + */ + flush_workqueue(ipoib_workqueue); + + ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_stop(dev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; /* Bring down any child interfaces too */ - down(&priv->vlan_mutex); + mutex_lock(&priv->vlan_mutex); list_for_each_entry(cpriv, &priv->child_intfs, list) { int flags; @@ -140,7 +165,7 @@ static int ipoib_stop(struct net_device *dev) dev_change_flags(cpriv->dev, flags & ~IFF_UP); } - up(&priv->vlan_mutex); + mutex_unlock(&priv->vlan_mutex); } return 0; @@ -236,9 +261,8 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) */ if (neigh->ah) ipoib_put_ah(neigh->ah); - *to_ipoib_neigh(neigh->neighbour) = NULL; - neigh->neighbour->ops->destructor = NULL; - kfree(neigh); + + ipoib_neigh_free(neigh); } spin_unlock_irqrestore(&priv->lock, flags); @@ -249,14 +273,71 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) kfree(path); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + +struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) +{ + struct ipoib_path_iter *iter; + + iter = kmalloc(sizeof *iter, GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + memset(iter->path.pathrec.dgid.raw, 0, 16); + + if (ipoib_path_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int ipoib_path_iter_next(struct ipoib_path_iter *iter) +{ + struct ipoib_dev_priv *priv = netdev_priv(iter->dev); + struct rb_node *n; + struct ipoib_path *path; + int ret = 1; + + spin_lock_irq(&priv->lock); + + n = rb_first(&priv->path_tree); + + while (n) { + path = rb_entry(n, struct ipoib_path, rb_node); + + if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, + sizeof (union ib_gid)) < 0) { + iter->path = *path; + ret = 0; + break; + } + + n = rb_next(n); + } + + spin_unlock_irq(&priv->lock); + + return ret; +} + +void ipoib_path_iter_read(struct ipoib_path_iter *iter, + struct ipoib_path *path) +{ + *path = iter->path; +} + +#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); - unsigned long flags; - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); list_splice(&priv->path_list, &remove_list); INIT_LIST_HEAD(&priv->path_list); @@ -264,14 +345,15 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry(path, &remove_list, list) rb_erase(&path->rb_node, &priv->path_tree); - spin_unlock_irqrestore(&priv->lock, flags); - list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); + spin_unlock_irq(&priv->lock); wait_for_completion(&path->done); path_free(dev, path); + spin_lock_irq(&priv->lock); } + spin_unlock_irq(&priv->lock); } static void path_rec_completion(int status, @@ -300,16 +382,9 @@ static void path_rec_completion(int status, struct ib_ah_attr av = { .dlid = be16_to_cpu(pathrec->dlid), .sl = pathrec->sl, - .port_num = priv->port + .port_num = priv->port, + .static_rate = pathrec->rate }; - int path_rate = ib_sa_rate_enum_to_int(pathrec->rate); - - if (path_rate > 0 && priv->local_rate > path_rate) - av.static_rate = (priv->local_rate - 1) / path_rate; - - ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n", - av.static_rate, priv->local_rate, - ib_sa_rate_enum_to_int(pathrec->rate)); ah = ipoib_create_ah(dev, priv->pd, &av); } @@ -334,9 +409,9 @@ static void path_rec_completion(int status, while ((skb = __skb_dequeue(&neigh->queue))) __skb_queue_tail(&skqueue, skb); } - } else - path->query = NULL; + } + path->query = NULL; complete(&path->done); spin_unlock_irqrestore(&priv->lock, flags); @@ -355,19 +430,15 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; - path = kmalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; - path->dev = dev; - path->pathrec.dlid = 0; - path->ah = NULL; + path->dev = dev; skb_queue_head_init(&path->queue); INIT_LIST_HEAD(&path->neigh_list); - path->query = NULL; - init_completion(&path->done); memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); path->pathrec.sgid = priv->local_gid; @@ -385,6 +456,8 @@ static int path_rec_start(struct net_device *dev, ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(path->pathrec.dgid)); + init_completion(&path->done); + path->query_id = ib_sa_path_rec_get(priv->ca, priv->port, &path->pathrec, @@ -410,7 +483,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_path *path; struct ipoib_neigh *neigh; - neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + neigh = ipoib_neigh_alloc(skb->dst->neighbour); if (!neigh) { ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -418,8 +491,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) } skb_queue_head_init(&neigh->queue); - neigh->neighbour = skb->dst->neighbour; - *to_ipoib_neigh(skb->dst->neighbour) = neigh; /* * We can only be called from ipoib_start_xmit, so we're @@ -432,14 +503,14 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) path = path_rec_create(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); if (!path) - goto err; + goto err_path; __path_add(dev, path); } list_add_tail(&neigh->list, &path->neigh_list); - if (path->pathrec.dlid) { + if (path->ah) { kref_get(&path->ah->ref); neigh->ah = path->ah; @@ -447,33 +518,27 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) be32_to_cpup((__be32 *) skb->dst->neighbour->ha)); } else { neigh->ah = NULL; - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - __skb_queue_tail(&neigh->queue, skb); - } else { - ++priv->stats.tx_dropped; - dev_kfree_skb_any(skb); - } + __skb_queue_tail(&neigh->queue, skb); if (!path->query && path_rec_start(dev, path)) - goto err; + goto err_list; } spin_unlock(&priv->lock); return; -err: - *to_ipoib_neigh(skb->dst->neighbour) = NULL; +err_list: list_del(&neigh->list); - neigh->neighbour->ops->destructor = NULL; - kfree(neigh); +err_path: + ipoib_neigh_free(neigh); ++priv->stats.tx_dropped; dev_kfree_skb_any(skb); spin_unlock(&priv->lock); } -static void path_lookup(struct sk_buff *skb, struct net_device *dev) +static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); @@ -525,7 +590,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, return; } - if (path->pathrec.dlid) { + if (path->ah) { ipoib_dbg(priv, "Send unicast ARP to %04x\n", be16_to_cpu(path->pathrec.dlid)); @@ -550,11 +615,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); + if (!spin_trylock_irqsave(&priv->tx_lock, flags)) return NETDEV_TX_LOCKED; - } /* * Check if our queue is stopped. Since we have the LLTX bit @@ -568,7 +630,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->dst && skb->dst->neighbour) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { - path_lookup(skb, dev); + ipoib_path_lookup(skb, dev); goto out; } @@ -600,14 +662,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); } else { - /* unicast GID -- should be ARP reply */ + /* unicast GID -- should be ARP or RARP reply */ - if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) { + if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && + (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " IPOIB_GID_FMT "\n", skb->dst ? "neigh" : "dst", - be16_to_cpup((u16 *) skb->data), - be32_to_cpup((u32 *) phdr->hwaddr), + be16_to_cpup((__be16 *) skb->data), + be32_to_cpup((__be32 *) phdr->hwaddr), IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); dev_kfree_skb_any(skb); ++priv->stats.tx_dropped; @@ -635,8 +698,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } @@ -657,7 +723,7 @@ static int ipoib_hard_header(struct sk_buff *skb, * destination address onto the front of the skb so we can * figure out where to send the packet later. */ - if (!skb->dst || !skb->dst->neighbour) { + if ((!skb->dst || !skb->dst->neighbour) && daddr) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); @@ -670,7 +736,12 @@ static void ipoib_set_mcast_list(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - schedule_work(&priv->restart_task); + if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { + ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set"); + return; + } + + queue_work(ipoib_workqueue, &priv->restart_task); } static void ipoib_neigh_destructor(struct neighbour *n) @@ -692,8 +763,7 @@ static void ipoib_neigh_destructor(struct neighbour *n) if (neigh->ah) ah = neigh->ah; list_del(&neigh->list); - *to_ipoib_neigh(n) = NULL; - kfree(neigh); + ipoib_neigh_free(neigh); } spin_unlock_irqrestore(&priv->lock, flags); @@ -702,21 +772,29 @@ static void ipoib_neigh_destructor(struct neighbour *n) ipoib_put_ah(ah); } -static int ipoib_neigh_setup(struct neighbour *neigh) +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) { - /* - * Is this kosher? I can't find anybody in the kernel that - * sets neigh->destructor, so we should be able to set it here - * without trouble. - */ - neigh->ops->destructor = ipoib_neigh_destructor; + struct ipoib_neigh *neigh; - return 0; + neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); + if (!neigh) + return NULL; + + neigh->neighbour = neighbour; + *to_ipoib_neigh(neighbour) = neigh; + + return neigh; +} + +void ipoib_neigh_free(struct ipoib_neigh *neigh) +{ + *to_ipoib_neigh(neigh->neighbour) = NULL; + kfree(neigh); } static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) { - parms->neigh_setup = ipoib_neigh_setup; + parms->neigh_destructor = ipoib_neigh_destructor; return 0; } @@ -726,26 +804,21 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) struct ipoib_dev_priv *priv = netdev_priv(dev); /* Allocate RX/TX "rings" to hold queued skbs */ - - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", - ca->name, IPOIB_RX_RING_SIZE); + ca->name, ipoib_recvq_size); goto out; } - memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", - ca->name, IPOIB_TX_RING_SIZE); + ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } - memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); /* priv->tx_head & tx_tail are already 0 */ @@ -768,7 +841,7 @@ void ipoib_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; - ipoib_delete_debug_file(dev); + ipoib_delete_debug_files(dev); /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { @@ -779,15 +852,11 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); - if (priv->rx_ring) { - kfree(priv->rx_ring); - priv->rx_ring = NULL; - } + kfree(priv->rx_ring); + kfree(priv->tx_ring); - if (priv->tx_ring) { - kfree(priv->tx_ring); - priv->tx_ring = NULL; - } + priv->rx_ring = NULL; + priv->tx_ring = NULL; } static void ipoib_setup(struct net_device *dev) @@ -806,10 +875,6 @@ static void ipoib_setup(struct net_device *dev) dev->watchdog_timeo = HZ; - dev->rebuild_header = NULL; - dev->set_mac_address = NULL; - dev->header_cache_update = NULL; - dev->flags |= IFF_BROADCAST | IFF_MULTICAST; /* @@ -819,7 +884,7 @@ static void ipoib_setup(struct net_device *dev) dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; - dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; + dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; /* MTU will be reset when mcast join happens */ @@ -837,8 +902,8 @@ static void ipoib_setup(struct net_device *dev) spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - init_MUTEX(&priv->mcast_mutex); - init_MUTEX(&priv->vlan_mutex); + mutex_init(&priv->mcast_mutex); + mutex_init(&priv->vlan_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); @@ -885,6 +950,12 @@ static ssize_t create_child(struct class_device *cdev, if (pkey < 0 || pkey > 0xffff) return -EINVAL; + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + pkey |= 0x8000; + ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev), pkey); @@ -937,6 +1008,12 @@ static struct net_device *ipoib_add_port(const char *format, goto alloc_mem_failed; } + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; @@ -973,8 +1050,7 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - if (ipoib_create_debug_file(priv->dev)) - goto debug_failed; + ipoib_create_debug_files(priv->dev); if (ipoib_add_pkey_attr(priv->dev)) goto sysfs_failed; @@ -988,13 +1064,12 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_file(priv->dev); - -debug_failed: + ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: ib_unregister_event_handler(&priv->event_handler); + flush_scheduled_work(); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1047,17 +1122,28 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); + flush_scheduled_work(); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); free_netdev(priv->dev); } + + kfree(dev_list); } static int __init ipoib_init_module(void) { int ret; + ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); + ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); + ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); + + ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); + ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); + ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); + ret = ipoib_register_debugfs(); if (ret) return ret;