Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / drivers / infiniband / hw / ipath / ipath_uc.c
1 /*
2  * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include "ipath_verbs.h"
35 #include "ipath_common.h"
36
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_UC_##x
39
40 static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
41                                struct ib_wc *wc)
42 {
43         if (++qp->s_last == qp->s_size)
44                 qp->s_last = 0;
45         if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
46             (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47                 wc->wr_id = wqe->wr.wr_id;
48                 wc->status = IB_WC_SUCCESS;
49                 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50                 wc->vendor_err = 0;
51                 wc->byte_len = wqe->length;
52                 wc->qp_num = qp->ibqp.qp_num;
53                 wc->src_qp = qp->remote_qpn;
54                 wc->pkey_index = 0;
55                 wc->slid = qp->remote_ah_attr.dlid;
56                 wc->sl = qp->remote_ah_attr.sl;
57                 wc->dlid_path_bits = 0;
58                 wc->port_num = 0;
59                 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60         }
61         wqe = get_swqe_ptr(qp, qp->s_last);
62 }
63
64 /**
65  * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
66  * @qp: a pointer to the QP
67  * @ohdr: a pointer to the IB header being constructed
68  * @pmtu: the path MTU
69  * @bth0p: pointer to the BTH opcode word
70  * @bth2p: pointer to the BTH PSN word
71  *
72  * Return 1 if constructed; otherwise, return 0.
73  * Note the QP s_lock must be held and interrupts disabled.
74  */
75 int ipath_make_uc_req(struct ipath_qp *qp,
76                       struct ipath_other_headers *ohdr,
77                       u32 pmtu, u32 *bth0p, u32 *bth2p)
78 {
79         struct ipath_swqe *wqe;
80         u32 hwords;
81         u32 bth0;
82         u32 len;
83         struct ib_wc wc;
84
85         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
86                 goto done;
87
88         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
89         hwords = 5;
90         bth0 = 0;
91
92         /* Get the next send request. */
93         wqe = get_swqe_ptr(qp, qp->s_last);
94         switch (qp->s_state) {
95         default:
96                 /*
97                  * Signal the completion of the last send
98                  * (if there is one).
99                  */
100                 if (qp->s_last != qp->s_tail)
101                         complete_last_send(qp, wqe, &wc);
102
103                 /* Check if send work queue is empty. */
104                 if (qp->s_tail == qp->s_head)
105                         goto done;
106                 /*
107                  * Start a new request.
108                  */
109                 qp->s_psn = wqe->psn = qp->s_next_psn;
110                 qp->s_sge.sge = wqe->sg_list[0];
111                 qp->s_sge.sg_list = wqe->sg_list + 1;
112                 qp->s_sge.num_sge = wqe->wr.num_sge;
113                 qp->s_len = len = wqe->length;
114                 switch (wqe->wr.opcode) {
115                 case IB_WR_SEND:
116                 case IB_WR_SEND_WITH_IMM:
117                         if (len > pmtu) {
118                                 qp->s_state = OP(SEND_FIRST);
119                                 len = pmtu;
120                                 break;
121                         }
122                         if (wqe->wr.opcode == IB_WR_SEND)
123                                 qp->s_state = OP(SEND_ONLY);
124                         else {
125                                 qp->s_state =
126                                         OP(SEND_ONLY_WITH_IMMEDIATE);
127                                 /* Immediate data comes after the BTH */
128                                 ohdr->u.imm_data = wqe->wr.imm_data;
129                                 hwords += 1;
130                         }
131                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
132                                 bth0 |= 1 << 23;
133                         break;
134
135                 case IB_WR_RDMA_WRITE:
136                 case IB_WR_RDMA_WRITE_WITH_IMM:
137                         ohdr->u.rc.reth.vaddr =
138                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
139                         ohdr->u.rc.reth.rkey =
140                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
141                         ohdr->u.rc.reth.length = cpu_to_be32(len);
142                         hwords += sizeof(struct ib_reth) / 4;
143                         if (len > pmtu) {
144                                 qp->s_state = OP(RDMA_WRITE_FIRST);
145                                 len = pmtu;
146                                 break;
147                         }
148                         if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
149                                 qp->s_state = OP(RDMA_WRITE_ONLY);
150                         else {
151                                 qp->s_state =
152                                         OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
153                                 /* Immediate data comes after the RETH */
154                                 ohdr->u.rc.imm_data = wqe->wr.imm_data;
155                                 hwords += 1;
156                                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
157                                         bth0 |= 1 << 23;
158                         }
159                         break;
160
161                 default:
162                         goto done;
163                 }
164                 if (++qp->s_tail >= qp->s_size)
165                         qp->s_tail = 0;
166                 break;
167
168         case OP(SEND_FIRST):
169                 qp->s_state = OP(SEND_MIDDLE);
170                 /* FALLTHROUGH */
171         case OP(SEND_MIDDLE):
172                 len = qp->s_len;
173                 if (len > pmtu) {
174                         len = pmtu;
175                         break;
176                 }
177                 if (wqe->wr.opcode == IB_WR_SEND)
178                         qp->s_state = OP(SEND_LAST);
179                 else {
180                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
181                         /* Immediate data comes after the BTH */
182                         ohdr->u.imm_data = wqe->wr.imm_data;
183                         hwords += 1;
184                 }
185                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
186                         bth0 |= 1 << 23;
187                 break;
188
189         case OP(RDMA_WRITE_FIRST):
190                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
191                 /* FALLTHROUGH */
192         case OP(RDMA_WRITE_MIDDLE):
193                 len = qp->s_len;
194                 if (len > pmtu) {
195                         len = pmtu;
196                         break;
197                 }
198                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
199                         qp->s_state = OP(RDMA_WRITE_LAST);
200                 else {
201                         qp->s_state =
202                                 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
203                         /* Immediate data comes after the BTH */
204                         ohdr->u.imm_data = wqe->wr.imm_data;
205                         hwords += 1;
206                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
207                                 bth0 |= 1 << 23;
208                 }
209                 break;
210         }
211         qp->s_len -= len;
212         qp->s_hdrwords = hwords;
213         qp->s_cur_sge = &qp->s_sge;
214         qp->s_cur_size = len;
215         *bth0p = bth0 | (qp->s_state << 24);
216         *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
217         return 1;
218
219 done:
220         return 0;
221 }
222
223 /**
224  * ipath_uc_rcv - handle an incoming UC packet
225  * @dev: the device the packet came in on
226  * @hdr: the header of the packet
227  * @has_grh: true if the packet has a GRH
228  * @data: the packet data
229  * @tlen: the length of the packet
230  * @qp: the QP for this packet.
231  *
232  * This is called from ipath_qp_rcv() to process an incoming UC packet
233  * for the given QP.
234  * Called at interrupt level.
235  */
236 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
237                   int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
238 {
239         struct ipath_other_headers *ohdr;
240         int opcode;
241         u32 hdrsize;
242         u32 psn;
243         u32 pad;
244         struct ib_wc wc;
245         u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
246         struct ib_reth *reth;
247         int header_in_data;
248
249         /* Check for GRH */
250         if (!has_grh) {
251                 ohdr = &hdr->u.oth;
252                 hdrsize = 8 + 12;       /* LRH + BTH */
253                 psn = be32_to_cpu(ohdr->bth[2]);
254                 header_in_data = 0;
255         } else {
256                 ohdr = &hdr->u.l.oth;
257                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
258                 /*
259                  * The header with GRH is 60 bytes and the
260                  * core driver sets the eager header buffer
261                  * size to 56 bytes so the last 4 bytes of
262                  * the BTH header (PSN) is in the data buffer.
263                  */
264                 header_in_data =
265                         ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
266                 if (header_in_data) {
267                         psn = be32_to_cpu(((__be32 *) data)[0]);
268                         data += sizeof(__be32);
269                 } else
270                         psn = be32_to_cpu(ohdr->bth[2]);
271         }
272         /*
273          * The opcode is in the low byte when its in network order
274          * (top byte when in host order).
275          */
276         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
277
278         wc.imm_data = 0;
279         wc.wc_flags = 0;
280
281         /* Compare the PSN verses the expected PSN. */
282         if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
283                 /*
284                  * Handle a sequence error.
285                  * Silently drop any current message.
286                  */
287                 qp->r_psn = psn;
288         inv:
289                 qp->r_state = OP(SEND_LAST);
290                 switch (opcode) {
291                 case OP(SEND_FIRST):
292                 case OP(SEND_ONLY):
293                 case OP(SEND_ONLY_WITH_IMMEDIATE):
294                         goto send_first;
295
296                 case OP(RDMA_WRITE_FIRST):
297                 case OP(RDMA_WRITE_ONLY):
298                 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
299                         goto rdma_first;
300
301                 default:
302                         dev->n_pkt_drops++;
303                         goto done;
304                 }
305         }
306
307         /* Check for opcode sequence errors. */
308         switch (qp->r_state) {
309         case OP(SEND_FIRST):
310         case OP(SEND_MIDDLE):
311                 if (opcode == OP(SEND_MIDDLE) ||
312                     opcode == OP(SEND_LAST) ||
313                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
314                         break;
315                 goto inv;
316
317         case OP(RDMA_WRITE_FIRST):
318         case OP(RDMA_WRITE_MIDDLE):
319                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
320                     opcode == OP(RDMA_WRITE_LAST) ||
321                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
322                         break;
323                 goto inv;
324
325         default:
326                 if (opcode == OP(SEND_FIRST) ||
327                     opcode == OP(SEND_ONLY) ||
328                     opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
329                     opcode == OP(RDMA_WRITE_FIRST) ||
330                     opcode == OP(RDMA_WRITE_ONLY) ||
331                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
332                         break;
333                 goto inv;
334         }
335
336         /* OK, process the packet. */
337         switch (opcode) {
338         case OP(SEND_FIRST):
339         case OP(SEND_ONLY):
340         case OP(SEND_ONLY_WITH_IMMEDIATE):
341         send_first:
342                 if (qp->r_reuse_sge) {
343                         qp->r_reuse_sge = 0;
344                         qp->r_sge = qp->s_rdma_sge;
345                 } else if (!ipath_get_rwqe(qp, 0)) {
346                         dev->n_pkt_drops++;
347                         goto done;
348                 }
349                 /* Save the WQE so we can reuse it in case of an error. */
350                 qp->s_rdma_sge = qp->r_sge;
351                 qp->r_rcv_len = 0;
352                 if (opcode == OP(SEND_ONLY))
353                         goto send_last;
354                 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
355                         goto send_last_imm;
356                 /* FALLTHROUGH */
357         case OP(SEND_MIDDLE):
358                 /* Check for invalid length PMTU or posted rwqe len. */
359                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
360                         qp->r_reuse_sge = 1;
361                         dev->n_pkt_drops++;
362                         goto done;
363                 }
364                 qp->r_rcv_len += pmtu;
365                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
366                         qp->r_reuse_sge = 1;
367                         dev->n_pkt_drops++;
368                         goto done;
369                 }
370                 ipath_copy_sge(&qp->r_sge, data, pmtu);
371                 break;
372
373         case OP(SEND_LAST_WITH_IMMEDIATE):
374         send_last_imm:
375                 if (header_in_data) {
376                         wc.imm_data = *(__be32 *) data;
377                         data += sizeof(__be32);
378                 } else {
379                         /* Immediate data comes after BTH */
380                         wc.imm_data = ohdr->u.imm_data;
381                 }
382                 hdrsize += 4;
383                 wc.wc_flags = IB_WC_WITH_IMM;
384                 /* FALLTHROUGH */
385         case OP(SEND_LAST):
386         send_last:
387                 /* Get the number of bytes the message was padded by. */
388                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
389                 /* Check for invalid length. */
390                 /* XXX LAST len should be >= 1 */
391                 if (unlikely(tlen < (hdrsize + pad + 4))) {
392                         qp->r_reuse_sge = 1;
393                         dev->n_pkt_drops++;
394                         goto done;
395                 }
396                 /* Don't count the CRC. */
397                 tlen -= (hdrsize + pad + 4);
398                 wc.byte_len = tlen + qp->r_rcv_len;
399                 if (unlikely(wc.byte_len > qp->r_len)) {
400                         qp->r_reuse_sge = 1;
401                         dev->n_pkt_drops++;
402                         goto done;
403                 }
404                 /* XXX Need to free SGEs */
405         last_imm:
406                 ipath_copy_sge(&qp->r_sge, data, tlen);
407                 wc.wr_id = qp->r_wr_id;
408                 wc.status = IB_WC_SUCCESS;
409                 wc.opcode = IB_WC_RECV;
410                 wc.vendor_err = 0;
411                 wc.qp_num = qp->ibqp.qp_num;
412                 wc.src_qp = qp->remote_qpn;
413                 wc.pkey_index = 0;
414                 wc.slid = qp->remote_ah_attr.dlid;
415                 wc.sl = qp->remote_ah_attr.sl;
416                 wc.dlid_path_bits = 0;
417                 wc.port_num = 0;
418                 /* Signal completion event if the solicited bit is set. */
419                 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
420                                (ohdr->bth[0] &
421                                 __constant_cpu_to_be32(1 << 23)) != 0);
422                 break;
423
424         case OP(RDMA_WRITE_FIRST):
425         case OP(RDMA_WRITE_ONLY):
426         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
427         rdma_first:
428                 /* RETH comes after BTH */
429                 if (!header_in_data)
430                         reth = &ohdr->u.rc.reth;
431                 else {
432                         reth = (struct ib_reth *)data;
433                         data += sizeof(*reth);
434                 }
435                 hdrsize += sizeof(*reth);
436                 qp->r_len = be32_to_cpu(reth->length);
437                 qp->r_rcv_len = 0;
438                 if (qp->r_len != 0) {
439                         u32 rkey = be32_to_cpu(reth->rkey);
440                         u64 vaddr = be64_to_cpu(reth->vaddr);
441                         int ok;
442
443                         /* Check rkey */
444                         ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
445                                            vaddr, rkey,
446                                            IB_ACCESS_REMOTE_WRITE);
447                         if (unlikely(!ok)) {
448                                 dev->n_pkt_drops++;
449                                 goto done;
450                         }
451                 } else {
452                         qp->r_sge.sg_list = NULL;
453                         qp->r_sge.sge.mr = NULL;
454                         qp->r_sge.sge.vaddr = NULL;
455                         qp->r_sge.sge.length = 0;
456                         qp->r_sge.sge.sge_length = 0;
457                 }
458                 if (unlikely(!(qp->qp_access_flags &
459                                IB_ACCESS_REMOTE_WRITE))) {
460                         dev->n_pkt_drops++;
461                         goto done;
462                 }
463                 if (opcode == OP(RDMA_WRITE_ONLY))
464                         goto rdma_last;
465                 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
466                         goto rdma_last_imm;
467                 /* FALLTHROUGH */
468         case OP(RDMA_WRITE_MIDDLE):
469                 /* Check for invalid length PMTU or posted rwqe len. */
470                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
471                         dev->n_pkt_drops++;
472                         goto done;
473                 }
474                 qp->r_rcv_len += pmtu;
475                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
476                         dev->n_pkt_drops++;
477                         goto done;
478                 }
479                 ipath_copy_sge(&qp->r_sge, data, pmtu);
480                 break;
481
482         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
483         rdma_last_imm:
484                 /* Get the number of bytes the message was padded by. */
485                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
486                 /* Check for invalid length. */
487                 /* XXX LAST len should be >= 1 */
488                 if (unlikely(tlen < (hdrsize + pad + 4))) {
489                         dev->n_pkt_drops++;
490                         goto done;
491                 }
492                 /* Don't count the CRC. */
493                 tlen -= (hdrsize + pad + 4);
494                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
495                         dev->n_pkt_drops++;
496                         goto done;
497                 }
498                 if (qp->r_reuse_sge)
499                         qp->r_reuse_sge = 0;
500                 else if (!ipath_get_rwqe(qp, 1)) {
501                         dev->n_pkt_drops++;
502                         goto done;
503                 }
504                 if (header_in_data) {
505                         wc.imm_data = *(__be32 *) data;
506                         data += sizeof(__be32);
507                 } else {
508                         /* Immediate data comes after BTH */
509                         wc.imm_data = ohdr->u.imm_data;
510                 }
511                 hdrsize += 4;
512                 wc.wc_flags = IB_WC_WITH_IMM;
513                 wc.byte_len = 0;
514                 goto last_imm;
515
516         case OP(RDMA_WRITE_LAST):
517         rdma_last:
518                 /* Get the number of bytes the message was padded by. */
519                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
520                 /* Check for invalid length. */
521                 /* XXX LAST len should be >= 1 */
522                 if (unlikely(tlen < (hdrsize + pad + 4))) {
523                         dev->n_pkt_drops++;
524                         goto done;
525                 }
526                 /* Don't count the CRC. */
527                 tlen -= (hdrsize + pad + 4);
528                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
529                         dev->n_pkt_drops++;
530                         goto done;
531                 }
532                 ipath_copy_sge(&qp->r_sge, data, tlen);
533                 break;
534
535         default:
536                 /* Drop packet for unknown opcodes. */
537                 dev->n_pkt_drops++;
538                 goto done;
539         }
540         qp->r_psn++;
541         qp->r_state = opcode;
542 done:
543         return;
544 }