X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Finfiniband%2Fhw%2Fmthca%2Fmthca_cq.c;h=205854e9c662809b49576b1ea8c3eb213e44ce72;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=2bf347b84c3142b3deb59de03a77b39cc7370118;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 2bf347b84..205854e9c 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,7 +39,7 @@ #include #include -#include +#include #include "mthca_dev.h" #include "mthca_cmd.h" @@ -53,21 +57,21 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_cq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 error_eqn; /* Tavor only */ - u32 comp_eqn; - u32 pd; - u32 lkey; - u32 last_notified_index; - u32 solicit_producer_index; - u32 consumer_index; - u32 producer_index; - u32 cqn; - u32 ci_db; /* Arbel only */ - u32 state_db; /* Arbel only */ - u32 reserved; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; } __attribute__((packed)); #define MTHCA_CQ_STATUS_OK ( 0 << 28) @@ -106,31 +110,31 @@ enum { }; struct mthca_cqe { - u32 my_qpn; - u32 my_ee; - u32 rqpn; - u16 sl_g_mlpath; - u16 rlid; - u32 imm_etype_pkey_eec; - u32 byte_cnt; - u32 wqe; - u8 opcode; - u8 is_send; - u8 reserved; - u8 owner; + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + __be16 sl_g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; }; struct mthca_err_cqe { - u32 my_qpn; - u32 reserved1[3]; - u8 syndrome; - u8 reserved2; - u16 db_cnt; - u32 reserved3; - u32 wqe; - u8 opcode; - u8 reserved4[2]; - u8 owner; + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 vendor_err; + __be16 db_cnt; + u32 reserved2; + __be32 wqe; + u8 opcode; + u8 reserved3[2]; + u8 owner; }; #define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) @@ -146,24 +150,29 @@ struct mthca_err_cqe { #define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) #define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) -static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf, + int entry) { - if (cq->is_direct) - return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); + if (buf->is_direct) + return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE); else - return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE; } -static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i) +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + return get_cqe_from_buf(&cq->buf, entry); +} + +static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe) { - struct mthca_cqe *cqe = get_cqe(cq, i); return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; } static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) { - return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe); + return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe)); } static inline void set_cqe_hw(struct mthca_cqe *cqe) @@ -171,6 +180,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; } +static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), + be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), + be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); +} + /* * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index * should be correct before calling update_cons_index(). @@ -178,7 +198,7 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, int incr) { - u32 doorbell[2]; + __be32 doorbell[2]; if (mthca_is_memfree(dev)) { *cq->set_ci_db = cpu_to_be32(cq->cons_index); @@ -193,7 +213,7 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, } } -void mthca_cq_event(struct mthca_dev *dev, u32 cqn) +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn) { struct mthca_cq *cq; @@ -209,21 +229,52 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type) { struct mthca_cq *cq; - struct mthca_cqe *cqe; - int prod_index; - int nfreed = 0; + struct ib_event event; + + spin_lock(&dev->cq_table.lock); - spin_lock_irq(&dev->cq_table.lock); cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); if (cq) - atomic_inc(&cq->refcount); - spin_unlock_irq(&dev->cq_table.lock); + ++cq->refcount; + + spin_unlock(&dev->cq_table.lock); - if (!cq) + if (!cq) { + mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn); return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.cq = &cq->ibcq; + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&event, cq->ibcq.cq_context); + + spin_lock(&dev->cq_table.lock); + if (!--cq->refcount) + wake_up(&cq->wait); + spin_unlock(&dev->cq_table.lock); +} + +static inline int is_recv_cqe(struct mthca_cqe *cqe) +{ + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) + return !(cqe->opcode & 0x01); + else + return !(cqe->is_send & 0x80); +} + +void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, + struct mthca_srq *srq) +{ + struct mthca_cqe *cqe; + u32 prod_index; + int nfreed = 0; spin_lock_irq(&cq->lock); @@ -235,29 +286,28 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) * from our QP and therefore don't need to be checked. */ for (prod_index = cq->cons_index; - cqe_sw(cq, prod_index & cq->ibcq.cqe); + cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe)); ++prod_index) if (prod_index == cq->cons_index + cq->ibcq.cqe) break; if (0) mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", - qpn, cqn, cq->cons_index, prod_index); + qpn, cq->cqn, cq->cons_index, prod_index); /* * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ - while (prod_index > cq->cons_index) { - cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); - if (cqe->my_qpn == cpu_to_be32(qpn)) + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if (cqe->my_qpn == cpu_to_be32(qpn)) { + if (srq && is_recv_cqe(cqe)) + mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; - else if (nfreed) - memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & - cq->ibcq.cqe), - cqe, - MTHCA_CQ_ENTRY_SIZE); - --prod_index; + } else if (nfreed) + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); } if (nfreed) { @@ -267,34 +317,74 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) } spin_unlock_irq(&cq->lock); - if (atomic_dec_and_test(&cq->refcount)) - wake_up(&cq->wait); } -static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, - struct mthca_qp *qp, int wqe_index, int is_send, - struct mthca_err_cqe *cqe, - struct ib_wc *entry, int *free_cqe) +void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) { - int err; - int dbd; - u32 new_wqe; + int i; - if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { - int j; + /* + * In Tavor mode, the hardware keeps the consumer and producer + * indices mod the CQ size. Since we might be making the CQ + * bigger, we need to deal with the case where the producer + * index wrapped around before the CQ was resized. + */ + if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) && + cq->ibcq.cqe < cq->resize_buf->cqe) { + cq->cons_index &= cq->ibcq.cqe; + if (cqe_sw(get_cqe(cq, cq->ibcq.cqe))) + cq->cons_index -= cq->ibcq.cqe + 1; + } - mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", - cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), - be32_to_cpu(cqe->wqe)); + for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i) + memcpy(get_cqe_from_buf(&cq->resize_buf->buf, + i & cq->resize_buf->cqe), + get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE); +} + +int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent) +{ + int ret; + int i; - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); + ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE, + MTHCA_MAX_DIRECT_CQ_SIZE, + &buf->queue, &buf->is_direct, + &dev->driver_pd, 1, &buf->mr); + if (ret) + return ret; + + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe_from_buf(buf, i)); + + return 0; +} + +void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe) +{ + mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue, + buf->is_direct, &buf->mr); +} + +static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct ib_wc *entry, int *free_cqe) +{ + int dbd; + __be32 new_wqe; + + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + mthca_dbg(dev, "local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), + cq->cqn, cq->cons_index); + dump_cqe(dev, cqe); } /* - * For completions in error, only work request ID, status (and - * freed resource count for RD) have to be set. + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. */ switch (cqe->syndrome) { case SYNDROME_LOCAL_LENGTH_ERR: @@ -356,9 +446,16 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } - err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); - if (err) - return err; + entry->vendor_err = cqe->vendor_err; + + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return; + + mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); /* * If we're at the end of the WQE chain, or we've used up our @@ -366,24 +463,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, * the next poll operation. */ if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) - return 0; + return; cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); cqe->wqe = new_wqe; cqe->syndrome = SYNDROME_WR_FLUSH_ERR; *free_cqe = 0; - - return 0; -} - -static void dump_cqe(struct mthca_cqe *cqe) -{ - int j; - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); } static inline int mthca_poll_one(struct mthca_dev *dev, @@ -414,8 +500,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe)); - - dump_cqe(cqe); + dump_cqe(dev, cqe); } is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == @@ -447,28 +532,32 @@ static inline int mthca_poll_one(struct mthca_dev *dev, >> wq->wqe_shift); entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = be32_to_cpu(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); } else { wq = &(*cur_qp)->rq; wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; entry->wr_id = (*cur_qp)->wrid[wqe_index]; } - if (wq->last_comp < wqe_index) - wq->tail += wqe_index - wq->last_comp; - else - wq->tail += wqe_index + wq->max - wq->last_comp; + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; - wq->last_comp = wqe_index; - - if (0) - mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n", - is_send ? "Send" : "Receive", - (*cur_qp)->qpn, wqe_index, wq->max); + wq->last_comp = wqe_index; + } if (is_error) { - err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, - (struct mthca_err_cqe *) cqe, - entry, &free_cqe); + handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); goto out; } @@ -562,11 +651,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, spin_lock_irqsave(&cq->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { + npolled = 0; +repoll: + while (npolled < num_entries) { err = mthca_poll_one(dev, cq, &qp, &freed, entry + npolled); if (err) break; + ++npolled; } if (freed) { @@ -574,6 +666,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, update_cons_index(dev, cq, freed); } + /* + * If a CQ resize is in progress and we discovered that the + * old buffer is empty, then peek in the new buffer, and if + * it's not empty, switch to the new buffer and continue + * polling there. + */ + if (unlikely(err == -EAGAIN && cq->resize_buf && + cq->resize_buf->state == CQ_RESIZE_READY)) { + /* + * In Tavor mode, the hardware keeps the producer + * index modulo the CQ size. Since we might be making + * the CQ bigger, we need to mask our consumer index + * using the size of the old CQ buffer before looking + * in the new CQ buffer. + */ + if (!mthca_is_memfree(dev)) + cq->cons_index &= cq->ibcq.cqe; + + if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf, + cq->cons_index & cq->resize_buf->cqe))) { + struct mthca_cq_buf tbuf; + int tcqe; + + tbuf = cq->buf; + tcqe = cq->ibcq.cqe; + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + cq->resize_buf->buf = tbuf; + cq->resize_buf->cqe = tcqe; + cq->resize_buf->state = CQ_RESIZE_SWAPPED; + + goto repoll; + } + } + spin_unlock_irqrestore(&cq->lock, flags); return err == 0 || err == -EAGAIN ? npolled : err; @@ -581,13 +709,13 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); - doorbell[1] = 0xffffffff; + doorbell[1] = (__force __be32) 0xffffffff; mthca_write64(doorbell, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, @@ -599,9 +727,9 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) { struct mthca_cq *cq = to_mcq(ibcq); - u32 doorbell[2]; + __be32 doorbell[2]; u32 sn; - u32 ci; + __be32 ci; sn = cq->arm_sn & 3; ci = cpu_to_be32(cq->cons_index); @@ -632,183 +760,73 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) return 0; } -static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) -{ - int i; - int size; - - if (cq->is_direct) - pci_free_consistent(dev->pdev, - (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - cq->queue.direct.buf, - pci_unmap_addr(&cq->queue.direct, - mapping)); - else { - size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; - for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) - if (cq->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - cq->queue.page_list[i].buf, - pci_unmap_addr(&cq->queue.page_list[i], - mapping)); - - kfree(cq->queue.page_list); - } -} - -static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, - struct mthca_cq *cq) -{ - int err = -ENOMEM; - int npages, shift; - u64 *dma_list = NULL; - dma_addr_t t; - int i; - - if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) { - cq->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, - size, &t); - if (!cq->queue.direct.buf) - return -ENOMEM; - - pci_unmap_addr_set(&cq->queue.direct, mapping, t); - - memset(cq->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - cq->is_direct = 0; - npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - shift = PAGE_SHIFT; - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - return -ENOMEM; - - cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list, - GFP_KERNEL); - if (!cq->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) - cq->queue.page_list[i].buf = NULL; - - for (i = 0; i < npages; ++i) { - cq->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); - if (!cq->queue.page_list[i].buf) - goto err_free; - - dma_list[i] = t; - pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t); - - memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE); - } - } - - err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, - dma_list, shift, npages, - 0, size, - MTHCA_MPT_FLAG_LOCAL_WRITE | - MTHCA_MPT_FLAG_LOCAL_READ, - &cq->mr); - if (err) - goto err_free; - - kfree(dma_list); - - return 0; - -err_free: - mthca_free_cq_buf(dev, cq); - -err_out: - kfree(dma_list); - - return err; -} - int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { - int size = nent * MTHCA_CQ_ENTRY_SIZE; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; u8 status; - int i; - - might_sleep(); - cq->ibcq.cqe = nent - 1; + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; cq->cqn = mthca_alloc(&dev->cq_table.alloc); if (cq->cqn == -1) return -ENOMEM; if (mthca_is_memfree(dev)) { - cq->arm_sn = 1; - err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); if (err) goto err_out; - err = -ENOMEM; + if (cq->is_kernel) { + cq->arm_sn = 1; - cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, - cq->cqn, &cq->set_ci_db); - if (cq->set_ci_db_index < 0) - goto err_out_icm; + err = -ENOMEM; - cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, - cq->cqn, &cq->arm_db); - if (cq->arm_db_index < 0) - goto err_out_ci; - } + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_mailbox; + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } + } - cq_context = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; - err = mthca_alloc_cq_buf(dev, size, cq); - if (err) - goto err_out_mailbox; + cq_context = mailbox->buf; - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); + if (cq->is_kernel) { + err = mthca_alloc_cq_buf(dev, &cq->buf, nent); + if (err) + goto err_out_mailbox; + } spin_lock_init(&cq->lock); - atomic_set(&cq->refcount, 1); + cq->refcount = 1; init_waitqueue_head(&cq->wait); memset(cq_context, 0, sizeof *cq_context); cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); - cq_context->start = cpu_to_be64(0); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | - dev->driver_uar.index); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); - cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); + cq_context->pd = cpu_to_be32(pdn); + cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); if (mthca_is_memfree(dev)) { @@ -816,7 +834,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->state_db = cpu_to_be32(cq->arm_db_index); } - err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); if (err) { mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); goto err_out_free_mr; @@ -840,22 +858,23 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq->cons_index = 0; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_free_mr: - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + if (cq->is_kernel) + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_out_mailbox: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); - if (mthca_is_memfree(dev)) +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); err_out_ci: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); err_out_icm: @@ -867,35 +886,43 @@ err_out: return err; } +static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq) +{ + int c; + + spin_lock_irq(&dev->cq_table.lock); + c = cq->refcount; + spin_unlock_irq(&dev->cq_table.lock); + + return c; +} + void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq) { - void *mailbox; + struct mthca_mailbox *mailbox; int err; u8 status; - might_sleep(); - - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { mthca_warn(dev, "No memory for mailbox to free CQ.\n"); return; } - err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); if (err) mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { - u32 *ctx = MAILBOX_ALIGN(mailbox); + __be32 *ctx = mailbox->buf; int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", - cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); for (j = 0; j < 16; ++j) printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); } @@ -903,6 +930,7 @@ void mthca_free_cq(struct mthca_dev *dev, spin_lock_irq(&dev->cq_table.lock); mthca_array_clear(&dev->cq_table.cq, cq->cqn & (dev->limits.num_cqs - 1)); + --cq->refcount; spin_unlock_irq(&dev->cq_table.lock); if (dev->mthca_flags & MTHCA_FLAG_MSI_X) @@ -910,20 +938,19 @@ void mthca_free_cq(struct mthca_dev *dev, else synchronize_irq(dev->pdev->irq); - atomic_dec(&cq->refcount); - wait_event(cq->wait, !atomic_read(&cq->refcount)); - - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + wait_event(cq->wait, !get_cq_refcount(dev, cq)); - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); - mthca_table_put(dev, dev->cq_table.table, cq->cqn); + if (cq->is_kernel) { + mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } } + mthca_table_put(dev, dev->cq_table.table, cq->cqn); mthca_free(&dev->cq_table.alloc, cq->cqn); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } int __devinit mthca_init_cq_table(struct mthca_dev *dev) @@ -947,7 +974,7 @@ int __devinit mthca_init_cq_table(struct mthca_dev *dev) return err; } -void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev) +void mthca_cleanup_cq_table(struct mthca_dev *dev) { mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); mthca_alloc_cleanup(&dev->cq_table.alloc);