X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Finfiniband%2Fcore%2Fuser_mad.c;h=afe70a549c2faa439901a0f27b5578f1fa154c8d;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=9d912d6877ffc01ed55943b15a55ae7230f23a6a;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 9d912d687..afe70a549 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: user_mad.c 1389 2004-12-27 22:56:47Z roland $ + * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ */ #include @@ -47,8 +49,8 @@ #include #include -#include -#include +#include +#include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); @@ -62,18 +64,42 @@ enum { IB_UMAD_MINOR_BASE = 0 }; +/* + * Our lifetime rules for these structs are the following: each time a + * device special file is opened, we look up the corresponding struct + * ib_umad_port by minor in the umad_port[] table while holding the + * port_lock. If this lookup succeeds, we take a reference on the + * ib_umad_port's struct ib_umad_device while still holding the + * port_lock; if the lookup fails, we fail the open(). We drop these + * references in the corresponding close(). + * + * In addition to references coming from open character devices, there + * is one more reference to each ib_umad_device representing the + * module's reference taken when allocating the ib_umad_device in + * ib_umad_add_one(). + * + * When destroying an ib_umad_device, we clear all of its + * ib_umad_ports from umad_port[] while holding port_lock before + * dropping the module's reference to the ib_umad_device. This is + * always safe because any open() calls will either succeed and obtain + * a reference before we clear the umad_port[] entries, or fail after + * we clear the umad_port[] entries. + */ + struct ib_umad_port { - int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; - int sm_devnum; - struct cdev sm_dev; - struct class_device sm_class_dev; + struct cdev *sm_dev; + struct class_device *sm_class_dev; struct semaphore sm_sem; + struct rw_semaphore mutex; + struct list_head file_list; + struct ib_device *ib_dev; struct ib_umad_device *umad_dev; + int dev_num; u8 port_num; }; @@ -84,40 +110,60 @@ struct ib_umad_device { }; struct ib_umad_file { - struct ib_umad_port *port; - spinlock_t recv_lock; - struct list_head recv_list; - wait_queue_head_t recv_wait; - struct rw_semaphore agent_mutex; - struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; - struct ib_mr *mr[IB_UMAD_MAX_AGENTS]; + struct ib_umad_port *port; + struct list_head recv_list; + struct list_head port_list; + spinlock_t recv_lock; + wait_queue_head_t recv_wait; + struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; + int agents_dead; }; struct ib_umad_packet { - struct ib_user_mad mad; - struct ib_ah *ah; + struct ib_mad_send_buf *msg; + struct ib_mad_recv_wc *recv_wc; struct list_head list; - DECLARE_PCI_UNMAP_ADDR(mapping) + int length; + struct ib_user_mad mad; }; +static struct class *umad_class; + static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); -static spinlock_t map_lock; + +static DEFINE_SPINLOCK(port_lock); +static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + +/* caller must hold port->mutex at least for reading */ +static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) +{ + return file->agents_dead ? NULL : file->agent[id]; +} + static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) { int ret = 1; - down_read(&file->agent_mutex); - for (packet->mad.id = 0; - packet->mad.id < IB_UMAD_MAX_AGENTS; - packet->mad.id++) - if (agent == file->agent[packet->mad.id]) { + down_read(&file->port->mutex); + + for (packet->mad.hdr.id = 0; + packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; + packet->mad.hdr.id++) + if (agent == __get_agent(file, packet->mad.hdr.id)) { spin_lock_irq(&file->recv_lock); list_add_tail(&packet->list, &file->recv_list); spin_unlock_irq(&file->recv_lock); @@ -126,7 +172,7 @@ static int queue_packet(struct ib_umad_file *file, break; } - up_read(&file->agent_mutex); + up_read(&file->port->mutex); return ret; } @@ -135,22 +181,17 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *packet = - (void *) (unsigned long) send_wc->wr_id; + struct ib_umad_packet *packet = send_wc->send_buf->context[0]; - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - ib_destroy_ah(packet->ah); + ib_destroy_ah(packet->msg->ah); + ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - packet->mad.status = ETIMEDOUT; - + packet->length = IB_MGMT_MAD_HDR; + packet->mad.hdr.status = ETIMEDOUT; if (!queue_packet(file, agent, packet)) return; } - kfree(packet); } @@ -161,37 +202,106 @@ static void recv_handler(struct ib_mad_agent *agent, struct ib_umad_packet *packet; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) - goto out; + goto err1; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) - goto out; - - memset(packet, 0, sizeof *packet); - - memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data); - packet->mad.status = 0; - packet->mad.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.sl = mad_recv_wc->wc->sl; - packet->mad.path_bits = mad_recv_wc->wc->dlid_path_bits; - packet->mad.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); - if (packet->mad.grh_present) { + goto err1; + + packet->length = mad_recv_wc->mad_len; + packet->recv_wc = mad_recv_wc; + + packet->mad.hdr.status = 0; + packet->mad.hdr.length = sizeof (struct ib_user_mad) + + mad_recv_wc->mad_len; + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); + if (packet->mad.hdr.grh_present) { /* XXX parse GRH */ - packet->mad.gid_index = 0; - packet->mad.hop_limit = 0; - packet->mad.traffic_class = 0; - memset(packet->mad.gid, 0, 16); - packet->mad.flow_label = 0; + packet->mad.hdr.gid_index = 0; + packet->mad.hdr.hop_limit = 0; + packet->mad.hdr.traffic_class = 0; + memset(packet->mad.hdr.gid, 0, 16); + packet->mad.hdr.flow_label = 0; } if (queue_packet(file, agent, packet)) - kfree(packet); + goto err2; + return; -out: +err2: + kfree(packet); +err1: ib_free_recv_mad(mad_recv_wc); } +static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + struct ib_mad_recv_buf *recv_buf; + int left, seg_payload, offset, max_seg_payload; + + /* We need enough room to copy the first (or only) MAD segment. */ + recv_buf = &packet->recv_wc->recv_buf; + if ((packet->length <= sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + packet->length) || + (packet->length > sizeof (*recv_buf->mad) && + count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) + return -EFAULT; + + buf += sizeof (packet->mad); + seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + if (copy_to_user(buf, recv_buf->mad, seg_payload)) + return -EFAULT; + + if (seg_payload < packet->length) { + /* + * Multipacket RMPP MAD message. Copy remainder of message. + * Note that last segment may have a shorter payload. + */ + if (count < sizeof (packet->mad) + packet->length) { + /* + * The buffer is too small, return the first RMPP segment, + * which includes the RMPP message length. + */ + return -ENOSPC; + } + offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); + max_seg_payload = sizeof (struct ib_mad) - offset; + + for (left = packet->length - seg_payload, buf += seg_payload; + left; left -= seg_payload, buf += seg_payload) { + recv_buf = container_of(recv_buf->list.next, + struct ib_mad_recv_buf, list); + seg_payload = min(left, max_seg_payload); + if (copy_to_user(buf, ((void *) recv_buf->mad) + offset, + seg_payload)) + return -EFAULT; + } + } + return sizeof (packet->mad) + packet->length; +} + +static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, + size_t count) +{ + ssize_t size = sizeof (packet->mad) + packet->length; + + if (count < size) + return -EINVAL; + + if (copy_to_user(buf, &packet->mad, size)) + return -EFAULT; + + return size; +} + static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -222,15 +332,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); - if (copy_to_user(buf, &packet->mad, sizeof packet->mad)) - ret = -EFAULT; + if (packet->recv_wc) + ret = copy_recv_mad(buf, packet, count); else - ret = sizeof packet->mad; + ret = copy_send_mad(buf, packet, count); - kfree(packet); + if (ret < 0) { + /* Requeue packet */ + spin_lock_irq(&file->recv_lock); + list_add(&packet->list, &file->recv_list); + spin_unlock_irq(&file->recv_lock); + } else { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); + kfree(packet); + } return ret; } +static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) +{ + int left, seg; + + /* Copy class specific header */ + if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && + copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, + msg->hdr_len - IB_MGMT_RMPP_HDR)) + return -EFAULT; + + /* All headers are in place. Copy data segments. */ + for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; + seg++, left -= msg->seg_size, buf += msg->seg_size) { + if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, + min(left, msg->seg_size))) + return -EFAULT; + } + return 0; +} + static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -238,109 +377,130 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - }; + struct ib_ah *ah; + struct ib_rmpp_mad *rmpp_mad; u8 method; - u64 *tid; - int ret; + __be64 *tid; + int ret, data_len, hdr_len, copy_offset, rmpp_active; - if (count < sizeof (struct ib_user_mad)) + if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) return -EINVAL; - packet = kmalloc(sizeof *packet, GFP_KERNEL); + packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) { - kfree(packet); - return -EFAULT; + if (copy_from_user(&packet->mad, buf, + sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { + ret = -EFAULT; + goto err; } - if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id < 0 || + packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } - down_read(&file->agent_mutex); + down_read(&file->port->mutex); - agent = file->agent[packet->mad.id]; + agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; } + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); + ah_attr.sl = packet->mad.hdr.sl; + ah_attr.src_path_bits = packet->mad.hdr.path_bits; + ah_attr.port_num = file->port->port_num; + if (packet->mad.hdr.grh_present) { + ah_attr.ah_flags = IB_AH_GRH; + memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); + ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); + ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; + ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; + } + + ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { + copy_offset = IB_MGMT_MAD_HDR; + rmpp_active = 0; + } else { + copy_offset = IB_MGMT_RMPP_HDR; + rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + IB_MGMT_RMPP_FLAG_ACTIVE; + } + + data_len = count - sizeof (struct ib_user_mad) - hdr_len; + packet->msg = ib_create_send_mad(agent, + be32_to_cpu(packet->mad.hdr.qpn), + 0, rmpp_active, hdr_len, + data_len, GFP_KERNEL); + if (IS_ERR(packet->msg)) { + ret = PTR_ERR(packet->msg); + goto err_ah; + } + + packet->msg->ah = ah; + packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->retries = packet->mad.hdr.retries; + packet->msg->context[0] = packet; + + /* Copy MAD header. Any RMPP header is already in place. */ + memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); + buf += sizeof (struct ib_user_mad); + + if (!rmpp_active) { + if (copy_from_user(packet->msg->mad + copy_offset, + buf + copy_offset, + hdr_len + data_len - copy_offset)) { + ret = -EFAULT; + goto err_msg; + } + } else { + ret = copy_rmpp_mad(packet->msg, buf); + if (ret) + goto err_msg; + } + /* * If userspace is generating a request that will generate a * response, we need to make sure the high-order part of the * transaction ID matches the agent being used to send the * MAD. */ - method = ((struct ib_mad_hdr *) packet->mad.data)->method; + method = ((struct ib_mad_hdr *) packet->msg->mad)->method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->mad.data)->tid; + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = be16_to_cpu(packet->mad.lid); - ah_attr.sl = packet->mad.sl; - ah_attr.src_path_bits = packet->mad.path_bits; - ah_attr.port_num = file->port->port_num; - if (packet->mad.grh_present) { - ah_attr.ah_flags = IB_AH_GRH; - memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16); - ah_attr.grh.flow_label = packet->mad.flow_label; - ah_attr.grh.hop_limit = packet->mad.hop_limit; - ah_attr.grh.traffic_class = packet->mad.traffic_class; - } + ret = ib_post_send_mad(packet->msg, NULL); + if (ret) + goto err_msg; - packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(packet->ah)) { - ret = PTR_ERR(packet->ah); - goto err_up; - } - - gather_list.addr = dma_map_single(agent->device->dma_device, - packet->mad.data, - sizeof packet->mad.data, - DMA_TO_DEVICE); - gather_list.length = sizeof packet->mad.data; - gather_list.lkey = file->mr[packet->mad.id]->lkey; - pci_unmap_addr_set(packet, mapping, gather_list.addr); - - wr.wr.ud.mad_hdr = (struct ib_mad_hdr *) packet->mad.data; - wr.wr.ud.ah = packet->ah; - wr.wr.ud.remote_qpn = be32_to_cpu(packet->mad.qpn); - wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey); - wr.wr.ud.timeout_ms = packet->mad.timeout_ms; - - wr.wr_id = (unsigned long) packet; - - ret = ib_post_send_mad(agent, &wr, &bad_wr); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(packet, mapping), - sizeof packet->mad.data, - DMA_TO_DEVICE); - goto err_up; - } - - up_read(&file->agent_mutex); - - return sizeof packet->mad; + up_read(&file->port->mutex); + return count; +err_msg: + ib_free_send_mad(packet->msg); +err_ah: + ib_destroy_ah(ah); err_up: - up_read(&file->agent_mutex); - + up_read(&file->port->mutex); err: kfree(packet); return ret; @@ -369,7 +529,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) int agent_id; int ret; - down_write(&file->agent_mutex); + down_write(&file->port->mutex); + + if (!file->port->ib_dev) { + ret = -EPIPE; + goto out; + } if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { ret = -EFAULT; @@ -382,7 +547,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) - if (!file->agent[agent_id]) + if (!__get_agent(file, agent_id)) goto found; ret = -ENOMEM; @@ -399,69 +564,58 @@ found: agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, - 0, send_handler, recv_handler, file); + ureq.rmpp_version, + send_handler, recv_handler, file); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto out; } - file->agent[agent_id] = agent; - - file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(file->mr[agent_id])) { - ret = -ENOMEM; - goto err; - } - if (put_user(agent_id, (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; - goto err_mr; + ib_unregister_mad_agent(agent); + goto out; } + file->agent[agent_id] = agent; ret = 0; - goto out; - -err_mr: - ib_dereg_mr(file->mr[agent_id]); - -err: - file->agent[agent_id] = NULL; - ib_unregister_mad_agent(agent); out: - up_write(&file->agent_mutex); + up_write(&file->port->mutex); return ret; } static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) { + struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; - down_write(&file->agent_mutex); + if (get_user(id, (u32 __user *) arg)) + return -EFAULT; - if (get_user(id, (u32 __user *) arg)) { - ret = -EFAULT; - goto out; - } + down_write(&file->port->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) { + if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } - ib_dereg_mr(file->mr[id]); - ib_unregister_mad_agent(file->agent[id]); + agent = file->agent[id]; file->agent[id] = NULL; out: - up_write(&file->agent_mutex); + up_write(&file->port->mutex); + + if (agent) + ib_unregister_mad_agent(agent); + return ret; } -static long ib_umad_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) +static long ib_umad_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: @@ -475,84 +629,136 @@ static long ib_umad_ioctl(struct file *filp, static int ib_umad_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, dev); + struct ib_umad_port *port; struct ib_umad_file *file; + int ret = 0; - file = kmalloc(sizeof *file, GFP_KERNEL); - if (!file) - return -ENOMEM; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); - memset(file, 0, sizeof *file); + if (!port) + return -ENXIO; + + down_write(&port->mutex); + + if (!port->ib_dev) { + ret = -ENXIO; + goto out; + } + + file = kzalloc(sizeof *file, GFP_KERNEL); + if (!file) { + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + ret = -ENOMEM; + goto out; + } spin_lock_init(&file->recv_lock); - init_rwsem(&file->agent_mutex); INIT_LIST_HEAD(&file->recv_list); init_waitqueue_head(&file->recv_wait); file->port = port; filp->private_data = file; - return 0; + list_add_tail(&file->port_list, &port->file_list); + +out: + up_write(&port->mutex); + return ret; } static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; + struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; + int already_dead; int i; - for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) - if (file->agent[i]) { - ib_dereg_mr(file->mr[i]); - ib_unregister_mad_agent(file->agent[i]); - } + down_write(&file->port->mutex); + + already_dead = file->agents_dead; + file->agents_dead = 1; - list_for_each_entry_safe(packet, tmp, &file->recv_list, list) + list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { + if (packet->recv_wc) + ib_free_recv_mad(packet->recv_wc); kfree(packet); + } + + list_del(&file->port_list); + + downgrade_write(&file->port->mutex); + + if (!already_dead) + for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) + if (file->agent[i]) + ib_unregister_mad_agent(file->agent[i]); + + up_read(&file->port->mutex); kfree(file); + kref_put(&dev->ref, ib_umad_release_dev); return 0; } static struct file_operations umad_fops = { - .owner = THIS_MODULE, - .read = ib_umad_read, - .write = ib_umad_write, - .poll = ib_umad_poll, + .owner = THIS_MODULE, + .read = ib_umad_read, + .write = ib_umad_write, + .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, - .compat_ioctl = ib_umad_ioctl, - .open = ib_umad_open, - .release = ib_umad_close + .compat_ioctl = ib_umad_ioctl, + .open = ib_umad_open, + .release = ib_umad_close }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, sm_dev); + struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; + if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) - return -EAGAIN; + if (down_trylock(&port->sm_sem)) { + ret = -EAGAIN; + goto fail; + } } else { - if (down_interruptible(&port->sm_sem)) - return -ERESTARTSYS; + if (down_interruptible(&port->sm_sem)) { + ret = -ERESTARTSYS; + goto fail; + } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); - return ret; + goto fail; } filp->private_data = port; return 0; + +fail: + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) @@ -561,11 +767,17 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) struct ib_port_modify props = { .clr_port_cap_mask = IB_PORT_SM }; - int ret; + int ret = 0; + + down_write(&port->mutex); + if (port->ib_dev) + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); + up_write(&port->mutex); - ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); up(&port->sm_sem); + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } @@ -581,21 +793,13 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) - return print_dev_t(buf, port->dev.dev); - else - return print_dev_t(buf, port->sm_dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%s\n", port->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -604,38 +808,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%d\n", port->port_num); } static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static void ib_umad_release_dev(struct kref *ref) -{ - struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); - - kfree(dev); -} - -static void ib_umad_release_port(struct class_device *class_dev) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) { - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); - } else { - cdev_del(&port->sm_dev); - clear_bit(port->sm_devnum, dev_map); - } - - kref_put(&port->umad_dev->ref, ib_umad_release_dev); -} - -static struct class umad_class = { - .name = "infiniband_mad", - .release = ib_umad_release_port -}; - static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); @@ -645,91 +824,144 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - spin_lock(&map_lock); - port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->devnum >= IB_UMAD_MAX_PORTS) { - spin_unlock(&map_lock); + spin_lock(&port_lock); + port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->dev_num >= IB_UMAD_MAX_PORTS) { + spin_unlock(&port_lock); return -1; } - port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); - if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { - spin_unlock(&map_lock); - return -1; - } - set_bit(port->devnum, dev_map); - set_bit(port->sm_devnum, dev_map); - spin_unlock(&map_lock); + set_bit(port->dev_num, dev_map); + spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); + init_rwsem(&port->mutex); + INIT_LIST_HEAD(&port->file_list); - cdev_init(&port->dev, &umad_fops); - port->dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "umad%d", port->devnum); - if (cdev_add(&port->dev, base_dev + port->devnum, 1)) + port->dev = cdev_alloc(); + if (!port->dev) return -1; - - port->class_dev.class = &umad_class; - port->class_dev.dev = device->dma_device; - - snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); - - if (class_device_register(&port->class_dev)) + port->dev->owner = THIS_MODULE; + port->dev->ops = &umad_fops; + kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); + if (cdev_add(port->dev, base_dev + port->dev_num, 1)) goto err_cdev; - class_set_devdata(&port->class_dev, port); - kref_get(&port->umad_dev->ref); + port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, + device->dma_device, + "umad%d", port->dev_num); + if (IS_ERR(port->class_dev)) + goto err_cdev; - if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) - goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_port)) + if (class_device_create_file(port->class_dev, &class_device_attr_port)) goto err_class; - cdev_init(&port->sm_dev, &umad_sm_fops); - port->sm_dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); - if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) - return -1; - - port->sm_class_dev.class = &umad_class; - port->sm_class_dev.dev = device->dma_device; - - snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + port->sm_dev = cdev_alloc(); + if (!port->sm_dev) + goto err_class; + port->sm_dev->owner = THIS_MODULE; + port->sm_dev->ops = &umad_sm_fops; + kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num); + if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + goto err_sm_cdev; - if (class_device_register(&port->sm_class_dev)) + port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, + device->dma_device, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_class_dev)) goto err_sm_cdev; - class_set_devdata(&port->sm_class_dev, port); - kref_get(&port->umad_dev->ref); + class_set_devdata(port->class_dev, port); + class_set_devdata(port->sm_class_dev, port); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) - goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) goto err_sm_class; + spin_lock(&port_lock); + umad_port[port->dev_num] = port; + spin_unlock(&port_lock); + return 0; err_sm_class: - class_device_unregister(&port->sm_class_dev); + class_device_destroy(umad_class, port->sm_dev->dev); err_sm_cdev: - cdev_del(&port->sm_dev); + cdev_del(port->sm_dev); err_class: - class_device_unregister(&port->class_dev); + class_device_destroy(umad_class, port->dev->dev); err_cdev: - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); + cdev_del(port->dev); + clear_bit(port->dev_num, dev_map); return -1; } +static void ib_umad_kill_port(struct ib_umad_port *port) +{ + struct ib_umad_file *file; + int id; + + class_set_devdata(port->class_dev, NULL); + class_set_devdata(port->sm_class_dev, NULL); + + class_device_destroy(umad_class, port->dev->dev); + class_device_destroy(umad_class, port->sm_dev->dev); + + cdev_del(port->dev); + cdev_del(port->sm_dev); + + spin_lock(&port_lock); + umad_port[port->dev_num] = NULL; + spin_unlock(&port_lock); + + down_write(&port->mutex); + + port->ib_dev = NULL; + + /* + * Now go through the list of files attached to this port and + * unregister all of their MAD agents. We need to hold + * port->mutex while doing this to avoid racing with + * ib_umad_close(), but we can't hold the mutex for writing + * while calling ib_unregister_mad_agent(), since that might + * deadlock by calling back into queue_packet(). So we + * downgrade our lock to a read lock, and then drop and + * reacquire the write lock for the next iteration. + * + * We do list_del_init() on the file's list_head so that the + * list_del in ib_umad_close() is still OK, even after the + * file is removed from the list. + */ + while (!list_empty(&port->file_list)) { + file = list_entry(port->file_list.next, struct ib_umad_file, + port_list); + + file->agents_dead = 1; + list_del_init(&file->port_list); + + downgrade_write(&port->mutex); + + for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) + if (file->agent[id]) + ib_unregister_mad_agent(file->agent[id]); + + up_read(&port->mutex); + down_write(&port->mutex); + } + + up_write(&port->mutex); + + clear_bit(port->dev_num, dev_map); +} + static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; @@ -742,15 +974,12 @@ static void ib_umad_add_one(struct ib_device *device) e = device->phys_port_cnt; } - umad_dev = kmalloc(sizeof *umad_dev + + umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; - memset(umad_dev, 0, sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port)); - kref_init(&umad_dev->ref); umad_dev->start_port = s; @@ -768,10 +997,8 @@ static void ib_umad_add_one(struct ib_device *device) return; err: - while (--i >= s) { - class_device_unregister(&umad_dev->port[i - s].class_dev); - class_device_unregister(&umad_dev->port[i - s].sm_class_dev); - } + while (--i >= s) + ib_umad_kill_port(&umad_dev->port[i - s]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -784,10 +1011,8 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { - class_device_unregister(&umad_dev->port[i].class_dev); - class_device_unregister(&umad_dev->port[i].sm_class_dev); - } + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -796,8 +1021,6 @@ static int __init ib_umad_init(void) { int ret; - spin_lock_init(&map_lock); - ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -805,13 +1028,14 @@ static int __init ib_umad_init(void) goto out; } - ret = class_register(&umad_class); - if (ret) { + umad_class = class_create(THIS_MODULE, "infiniband_mad"); + if (IS_ERR(umad_class)) { + ret = PTR_ERR(umad_class); printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } - ret = class_create_file(&umad_class, &class_attr_abi_version); + ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -826,7 +1050,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_unregister(&umad_class); + class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); @@ -838,7 +1062,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_unregister(&umad_class); + class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); }