Skip to content

Commit 81091d7

Browse files
mustafakismailjgunthorpe
authored andcommitted
RDMA/irdma: Add SW mechanism to generate completions on error
HW flushes after QP in error state is not reliable. This can lead to application hang waiting on a completion for outstanding WRs. Implement a SW mechanism to generate completions for any outstanding WR's after the QP is modified to error. This is accomplished by starting a delayed worker after the QP is modified to error and the HW flush is performed. The worker will generate completions that will be returned to the application when it polls the CQ. This mechanism only applies to Kernel applications. Link: https://lore.kernel.org/r/20220425181624.1617-1-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent a2d36b0 commit 81091d7

File tree

4 files changed

+210
-37
lines changed

4 files changed

+210
-37
lines changed

drivers/infiniband/hw/irdma/hw.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
6161
struct irdma_cq *cq = iwcq->back_cq;
6262

6363
if (!cq->user_mode)
64-
cq->armed = false;
64+
atomic_set(&cq->armed, 0);
6565
if (cq->ibcq.comp_handler)
6666
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
6767
}
@@ -2689,24 +2689,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
26892689
info.sq = flush_mask & IRDMA_FLUSH_SQ;
26902690
info.rq = flush_mask & IRDMA_FLUSH_RQ;
26912691

2692-
if (flush_mask & IRDMA_REFLUSH) {
2693-
if (info.sq)
2694-
iwqp->sc_qp.flush_sq = false;
2695-
if (info.rq)
2696-
iwqp->sc_qp.flush_rq = false;
2697-
}
2698-
26992692
/* Generate userflush errors in CQE */
27002693
info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
27012694
info.sq_minor_code = FLUSH_GENERAL_ERR;
27022695
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
27032696
info.rq_minor_code = FLUSH_GENERAL_ERR;
27042697
info.userflushcode = true;
2705-
if (flush_code) {
2706-
if (info.sq && iwqp->sc_qp.sq_flush_code)
2707-
info.sq_minor_code = flush_code;
2708-
if (info.rq && iwqp->sc_qp.rq_flush_code)
2709-
info.rq_minor_code = flush_code;
2698+
2699+
if (flush_mask & IRDMA_REFLUSH) {
2700+
if (info.sq)
2701+
iwqp->sc_qp.flush_sq = false;
2702+
if (info.rq)
2703+
iwqp->sc_qp.flush_rq = false;
2704+
} else {
2705+
if (flush_code) {
2706+
if (info.sq && iwqp->sc_qp.sq_flush_code)
2707+
info.sq_minor_code = flush_code;
2708+
if (info.rq && iwqp->sc_qp.rq_flush_code)
2709+
info.rq_minor_code = flush_code;
2710+
}
2711+
if (!iwqp->user_mode)
2712+
queue_delayed_work(iwqp->iwdev->cleanup_wq,
2713+
&iwqp->dwork_flush,
2714+
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
27102715
}
27112716

27122717
/* Issue flush */

drivers/infiniband/hw/irdma/utils.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,3 +2498,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
24982498

24992499
return polarity != ukcq->polarity;
25002500
}
2501+
2502+
void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
2503+
{
2504+
struct irdma_cmpl_gen *cmpl_node;
2505+
struct list_head *tmp_node, *list_node;
2506+
2507+
list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
2508+
cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
2509+
list_del(&cmpl_node->list);
2510+
kfree(cmpl_node);
2511+
}
2512+
}
2513+
2514+
int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
2515+
{
2516+
struct irdma_cmpl_gen *cmpl;
2517+
2518+
if (list_empty(&iwcq->cmpl_generated))
2519+
return -ENOENT;
2520+
cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
2521+
list_del(&cmpl->list);
2522+
memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
2523+
kfree(cmpl);
2524+
2525+
ibdev_dbg(iwcq->ibcq.device,
2526+
"VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
2527+
__func__, cq_poll_info->qp_id, cq_poll_info->op_type,
2528+
cq_poll_info->wr_id);
2529+
2530+
return 0;
2531+
}
2532+
2533+
/**
2534+
* irdma_set_cpi_common_values - fill in values for polling info struct
2535+
* @cpi: resulting structure of cq_poll_info type
2536+
* @qp: QPair
2537+
* @qp_num: id of the QP
2538+
*/
2539+
static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
2540+
struct irdma_qp_uk *qp, u32 qp_num)
2541+
{
2542+
cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
2543+
cpi->error = true;
2544+
cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
2545+
cpi->minor_err = FLUSH_GENERAL_ERR;
2546+
cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
2547+
cpi->qp_id = qp_num;
2548+
}
2549+
2550+
static inline void irdma_comp_handler(struct irdma_cq *cq)
2551+
{
2552+
if (!cq->ibcq.comp_handler)
2553+
return;
2554+
if (atomic_cmpxchg(&cq->armed, 1, 0))
2555+
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
2556+
}
2557+
2558+
void irdma_generate_flush_completions(struct irdma_qp *iwqp)
2559+
{
2560+
struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
2561+
struct irdma_ring *sq_ring = &qp->sq_ring;
2562+
struct irdma_ring *rq_ring = &qp->rq_ring;
2563+
struct irdma_cmpl_gen *cmpl;
2564+
__le64 *sw_wqe;
2565+
u64 wqe_qword;
2566+
u32 wqe_idx;
2567+
bool compl_generated = false;
2568+
unsigned long flags1;
2569+
2570+
spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
2571+
if (irdma_cq_empty(iwqp->iwscq)) {
2572+
unsigned long flags2;
2573+
2574+
spin_lock_irqsave(&iwqp->lock, flags2);
2575+
while (IRDMA_RING_MORE_WORK(*sq_ring)) {
2576+
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
2577+
if (!cmpl) {
2578+
spin_unlock_irqrestore(&iwqp->lock, flags2);
2579+
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
2580+
return;
2581+
}
2582+
2583+
wqe_idx = sq_ring->tail;
2584+
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
2585+
2586+
cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
2587+
sw_wqe = qp->sq_base[wqe_idx].elem;
2588+
get_64bit_val(sw_wqe, 24, &wqe_qword);
2589+
cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
2590+
/* remove the SQ WR by moving SQ tail*/
2591+
IRDMA_RING_SET_TAIL(*sq_ring,
2592+
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
2593+
2594+
ibdev_dbg(iwqp->iwscq->ibcq.device,
2595+
"DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
2596+
__func__, cmpl->cpi.wr_id, qp->qp_id);
2597+
list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
2598+
compl_generated = true;
2599+
}
2600+
spin_unlock_irqrestore(&iwqp->lock, flags2);
2601+
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
2602+
if (compl_generated)
2603+
irdma_comp_handler(iwqp->iwrcq);
2604+
} else {
2605+
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
2606+
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
2607+
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
2608+
}
2609+
2610+
spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
2611+
if (irdma_cq_empty(iwqp->iwrcq)) {
2612+
unsigned long flags2;
2613+
2614+
spin_lock_irqsave(&iwqp->lock, flags2);
2615+
while (IRDMA_RING_MORE_WORK(*rq_ring)) {
2616+
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
2617+
if (!cmpl) {
2618+
spin_unlock_irqrestore(&iwqp->lock, flags2);
2619+
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
2620+
return;
2621+
}
2622+
2623+
wqe_idx = rq_ring->tail;
2624+
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
2625+
2626+
cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
2627+
cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
2628+
/* remove the RQ WR by moving RQ tail */
2629+
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
2630+
ibdev_dbg(iwqp->iwrcq->ibcq.device,
2631+
"DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
2632+
__func__, cmpl->cpi.wr_id, qp->qp_id,
2633+
wqe_idx);
2634+
list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
2635+
2636+
compl_generated = true;
2637+
}
2638+
spin_unlock_irqrestore(&iwqp->lock, flags2);
2639+
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
2640+
if (compl_generated)
2641+
irdma_comp_handler(iwqp->iwrcq);
2642+
} else {
2643+
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
2644+
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
2645+
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
2646+
}
2647+
}

drivers/infiniband/hw/irdma/verbs.c

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
535535
if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
536536
irdma_modify_qp_to_err(&iwqp->sc_qp);
537537

538+
if (!iwqp->user_mode)
539+
cancel_delayed_work_sync(&iwqp->dwork_flush);
540+
538541
irdma_qp_rem_ref(&iwqp->ibqp);
539542
wait_for_completion(&iwqp->free_qp);
540543
irdma_free_lsmm_rsrc(iwqp);
@@ -790,6 +793,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
790793
return 0;
791794
}
792795

796+
static void irdma_flush_worker(struct work_struct *work)
797+
{
798+
struct delayed_work *dwork = to_delayed_work(work);
799+
struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
800+
801+
irdma_generate_flush_completions(iwqp);
802+
}
803+
793804
/**
794805
* irdma_create_qp - create qp
795806
* @ibqp: ptr of qp
@@ -909,6 +920,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
909920
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
910921
irdma_setup_virt_qp(iwdev, iwqp, &init_info);
911922
} else {
923+
INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
912924
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
913925
err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
914926
}
@@ -1400,11 +1412,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
14001412
}
14011413
if (iwqp->ibqp_state > IB_QPS_RTS &&
14021414
!iwqp->flush_issued) {
1403-
iwqp->flush_issued = 1;
14041415
spin_unlock_irqrestore(&iwqp->lock, flags);
14051416
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
14061417
IRDMA_FLUSH_RQ |
14071418
IRDMA_FLUSH_WAIT);
1419+
iwqp->flush_issued = 1;
14081420
} else {
14091421
spin_unlock_irqrestore(&iwqp->lock, flags);
14101422
}
@@ -1757,6 +1769,8 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
17571769
unsigned long flags;
17581770

17591771
spin_lock_irqsave(&iwcq->lock, flags);
1772+
if (!list_empty(&iwcq->cmpl_generated))
1773+
irdma_remove_cmpls_list(iwcq);
17601774
if (!list_empty(&iwcq->resize_list))
17611775
irdma_process_resize_list(iwcq, iwdev, NULL);
17621776
spin_unlock_irqrestore(&iwcq->lock, flags);
@@ -1961,6 +1975,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
19611975
cq->back_cq = iwcq;
19621976
spin_lock_init(&iwcq->lock);
19631977
INIT_LIST_HEAD(&iwcq->resize_list);
1978+
INIT_LIST_HEAD(&iwcq->cmpl_generated);
19641979
info.dev = dev;
19651980
ukinfo->cq_size = max(entries, 4);
19661981
ukinfo->cq_id = cq_num;
@@ -3046,15 +3061,12 @@ static int irdma_post_send(struct ib_qp *ibqp,
30463061
unsigned long flags;
30473062
bool inv_stag;
30483063
struct irdma_ah *ah;
3049-
bool reflush = false;
30503064

30513065
iwqp = to_iwqp(ibqp);
30523066
ukqp = &iwqp->sc_qp.qp_uk;
30533067
dev = &iwqp->iwdev->rf->sc_dev;
30543068

30553069
spin_lock_irqsave(&iwqp->lock, flags);
3056-
if (iwqp->flush_issued && ukqp->sq_flush_complete)
3057-
reflush = true;
30583070
while (ib_wr) {
30593071
memset(&info, 0, sizeof(info));
30603072
inv_stag = false;
@@ -3204,15 +3216,14 @@ static int irdma_post_send(struct ib_qp *ibqp,
32043216
ib_wr = ib_wr->next;
32053217
}
32063218

3207-
if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
3208-
irdma_uk_qp_post_wr(ukqp);
3219+
if (!iwqp->flush_issued) {
3220+
if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
3221+
irdma_uk_qp_post_wr(ukqp);
32093222
spin_unlock_irqrestore(&iwqp->lock, flags);
3210-
} else if (reflush) {
3211-
ukqp->sq_flush_complete = false;
3212-
spin_unlock_irqrestore(&iwqp->lock, flags);
3213-
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
32143223
} else {
32153224
spin_unlock_irqrestore(&iwqp->lock, flags);
3225+
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
3226+
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
32163227
}
32173228
if (err)
32183229
*bad_wr = ib_wr;
@@ -3235,14 +3246,11 @@ static int irdma_post_recv(struct ib_qp *ibqp,
32353246
struct irdma_post_rq_info post_recv = {};
32363247
unsigned long flags;
32373248
int err = 0;
3238-
bool reflush = false;
32393249

32403250
iwqp = to_iwqp(ibqp);
32413251
ukqp = &iwqp->sc_qp.qp_uk;
32423252

32433253
spin_lock_irqsave(&iwqp->lock, flags);
3244-
if (iwqp->flush_issued && ukqp->rq_flush_complete)
3245-
reflush = true;
32463254
while (ib_wr) {
32473255
post_recv.num_sges = ib_wr->num_sge;
32483256
post_recv.wr_id = ib_wr->wr_id;
@@ -3258,13 +3266,10 @@ static int irdma_post_recv(struct ib_qp *ibqp,
32583266
}
32593267

32603268
out:
3261-
if (reflush) {
3262-
ukqp->rq_flush_complete = false;
3263-
spin_unlock_irqrestore(&iwqp->lock, flags);
3264-
irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
3265-
} else {
3266-
spin_unlock_irqrestore(&iwqp->lock, flags);
3267-
}
3269+
spin_unlock_irqrestore(&iwqp->lock, flags);
3270+
if (iwqp->flush_issued)
3271+
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
3272+
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
32683273

32693274
if (err)
32703275
*bad_wr = ib_wr;
@@ -3476,6 +3481,11 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
34763481
/* check the current CQ for new cqes */
34773482
while (npolled < num_entries) {
34783483
ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
3484+
if (ret == -ENOENT) {
3485+
ret = irdma_generated_cmpls(iwcq, cur_cqe);
3486+
if (!ret)
3487+
irdma_process_cqe(entry + npolled, cur_cqe);
3488+
}
34793489
if (!ret) {
34803490
++npolled;
34813491
cq_new_cqe = true;
@@ -3557,13 +3567,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
35573567
if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
35583568
promo_event = true;
35593569

3560-
if (!iwcq->armed || promo_event) {
3561-
iwcq->armed = true;
3570+
if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
35623571
iwcq->last_notify = cq_notify;
35633572
irdma_uk_cq_request_notification(ukcq, cq_notify);
35643573
}
35653574

3566-
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
3575+
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
3576+
(!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
35673577
ret = 1;
35683578
spin_unlock_irqrestore(&iwcq->lock, flags);
35693579

0 commit comments

Comments
 (0)