Skip to content

Commit

Permalink
RDMA/rxe: Convert spin_{lock_bh,unlock_bh} to spin_{lock_irqsave,unlo…
Browse files Browse the repository at this point in the history
…ck_irqrestore}

We need to call spin_lock_irqsave()/spin_unlock_irqrestore() for
state_lock in rxe, otherwsie the callchain:

  ib_post_send_mad
	-> spin_lock_irqsave
	-> ib_post_send -> rxe_post_send
				-> spin_lock_bh
				-> spin_unlock_bh
	-> spin_unlock_irqrestore

Causes below traces during run block nvmeof-mp/001 test due to mismatched
spinlock nesting:

  WARNING: CPU: 0 PID: 94794 at kernel/softirq.c:376 __local_bh_enable_ip+0xc2/0x140
  [ ... ]
  CPU: 0 PID: 94794 Comm: kworker/u4:1 Tainted: G            E      6.4.0-rc1 #9
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
  Workqueue: rdma_cm cma_work_handler [rdma_cm]
  RIP: 0010:__local_bh_enable_ip+0xc2/0x140
  Code: 48 85 c0 74 72 5b 41 5c 5d 31 c0 89 c2 89 c1 89 c6 89 c7 41 89 c0 e9 bd 0e 11 01 65 8b 05 f2 65 72 48 85 c0 0f 85 76 ff ff ff <0f> 0b e9 6f ff ff ff e8 d2 39 1c 00 eb 80 4c 89 e7 e8 68 ad 0a 00
  RSP: 0018:ffffb7cf818539f0 EFLAGS: 00010046
  RAX: 0000000000000000 RBX: 0000000000000201 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: 0000000000000201 RDI: ffffffffc0f25f79
  RBP: ffffb7cf81853a00 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc0f25f79
  R13: ffff8db1f0fa6000 R14: ffff8db2c63ff000 R15: 00000000000000e8
  FS:  0000000000000000(0000) GS:ffff8db33bc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000559758db0f20 CR3: 0000000105124000 CR4: 00000000003506f0
  Call Trace:
   <TASK>
   _raw_spin_unlock_bh+0x31/0x40
   rxe_post_send+0x59/0x8b0 [rdma_rxe]
   ib_send_mad+0x26b/0x470 [ib_core]
   ib_post_send_mad+0x150/0xb40 [ib_core]
   ? cm_form_tid+0x5b/0x90 [ib_cm]
   ib_send_cm_req+0x7c8/0xb70 [ib_cm]
   rdma_connect_locked+0x433/0x940 [rdma_cm]
   nvme_rdma_cm_handler+0x5d7/0x9c0 [nvme_rdma]
   cma_cm_event_handler+0x4f/0x170 [rdma_cm]
   cma_work_handler+0x6a/0xe0 [rdma_cm]
   process_one_work+0x2a9/0x580
   worker_thread+0x52/0x3f0
   ? __pfx_worker_thread+0x10/0x10
   kthread+0x109/0x140
   ? __pfx_kthread+0x10/0x10
   ret_from_fork+0x2c/0x50
   </TASK>


  raw_local_irq_restore() called with IRQs enabled
  WARNING: CPU: 0 PID: 94794 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x37/0x60
  [ ... ]
  CPU: 0 PID: 94794 Comm: kworker/u4:1 Tainted: G        W   E      6.4.0-rc1 #9
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
  Workqueue: rdma_cm cma_work_handler [rdma_cm]
  RIP: 0010:warn_bogus_irq_restore+0x37/0x60
  Code: fb 01 77 36 83 e3 01 74 0e 48 8b 5d f8 c9 31 f6 89 f7 e9 ac ea 01 00 48 c7 c7 e0 52 33 b9 c6 05 bb 1c 69 01 01 e8 39 24 f0 fe <0f> 0b 48 8b 5d f8 c9 31 f6 89 f7 e9 89 ea 01 00 0f b6 f3 48 c7 c7
  RSP: 0018:ffffb7cf81853a58 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: ffffb7cf81853a60 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8db2cfb1a9e8
  R13: ffff8db2cfb1a9d8 R14: ffff8db2c63ff000 R15: 0000000000000000
  FS:  0000000000000000(0000) GS:ffff8db33bc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000559758db0f20 CR3: 0000000105124000 CR4: 00000000003506f0
  Call Trace:
   <TASK>
   _raw_spin_unlock_irqrestore+0x91/0xa0
   ib_send_mad+0x1e3/0x470 [ib_core]
   ib_post_send_mad+0x150/0xb40 [ib_core]
   ? cm_form_tid+0x5b/0x90 [ib_cm]
   ib_send_cm_req+0x7c8/0xb70 [ib_cm]
   rdma_connect_locked+0x433/0x940 [rdma_cm]
   nvme_rdma_cm_handler+0x5d7/0x9c0 [nvme_rdma]
   cma_cm_event_handler+0x4f/0x170 [rdma_cm]
   cma_work_handler+0x6a/0xe0 [rdma_cm]
   process_one_work+0x2a9/0x580
   worker_thread+0x52/0x3f0
   ? __pfx_worker_thread+0x10/0x10
   kthread+0x109/0x140
   ? __pfx_kthread+0x10/0x10
   ret_from_fork+0x2c/0x50
   </TASK>

Fixes: f605f26 ("RDMA/rxe: Protect QP state with qp->state_lock")
Link: https://lore.kernel.org/r/20230510035056.881196-1-guoqing.jiang@linux.dev
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Reviewed-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Guoqing Jiang authored and jgunthorpe committed May 17, 2023
1 parent 17eabd6 commit b5f3fe2
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 61 deletions.
26 changes: 16 additions & 10 deletions drivers/infiniband/sw/rxe/rxe_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,16 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
unsigned long flags;

rxe_dbg_qp(qp, "retransmit timer fired\n");

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task);
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
Expand Down Expand Up @@ -481,11 +482,13 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)

static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
spin_lock_bh(&qp->state_lock);
unsigned long flags;

spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
qp->attr.sq_draining = 0;
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

if (qp->ibqp.event_handler) {
struct ib_event ev;
Expand All @@ -499,7 +502,7 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
return;
}
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

static inline enum comp_state complete_ack(struct rxe_qp *qp,
Expand Down Expand Up @@ -625,13 +628,15 @@ static void free_pkt(struct rxe_pkt_info *pkt)
*/
static void reset_retry_timer(struct rxe_qp *qp)
{
unsigned long flags;

if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) >= IB_QPS_RTS &&
psn_compare(qp->req.psn, qp->comp.psn) > 0)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
}

Expand All @@ -643,18 +648,19 @@ int rxe_completer(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
int ret;
unsigned long flags;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);

drain_resp_pkts(qp);
flush_send_queue(qp, notify);
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
Expand Down
7 changes: 4 additions & 3 deletions drivers/infiniband/sw/rxe/rxe_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,15 +412,16 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
unsigned long flags;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
(!is_request && (qp_state(qp) < IB_QPS_RTR))) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

rxe_icrc_generate(skb, pkt);

Expand Down
36 changes: 23 additions & 13 deletions drivers/infiniband/sw/rxe/rxe_qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
unsigned long flags;

rxe_get(pd);
rxe_get(rcq);
Expand All @@ -325,10 +326,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err)
goto err2;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

return 0;

Expand Down Expand Up @@ -492,24 +493,28 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
spin_lock_bh(&qp->state_lock);
unsigned long flags;

spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_ERR;

/* drain work and packet queues */
rxe_sched_task(&qp->resp.task);
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
spin_lock_bh(&qp->state_lock);
unsigned long flags;

spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

/* caller should hold qp->state_lock */
Expand Down Expand Up @@ -555,14 +560,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.cur_qp_state = attr->qp_state;

if (mask & IB_QP_STATE) {
spin_lock_bh(&qp->state_lock);
unsigned long flags;

spin_lock_irqsave(&qp->state_lock, flags);
err = __qp_chk_state(qp, attr, mask);
if (!err) {
qp->attr.qp_state = attr->qp_state;
rxe_dbg_qp(qp, "state -> %s\n",
qps2str[attr->qp_state]);
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

if (err)
return err;
Expand Down Expand Up @@ -688,6 +695,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
/* called by the query qp verb */
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
{
unsigned long flags;

*attr = qp->attr;

attr->rq_psn = qp->resp.psn;
Expand All @@ -708,12 +717,12 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
/* Applications that get this state typically spin on it.
* Yield the processor
*/
spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->attr.sq_draining) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
} else {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

return 0;
Expand All @@ -737,10 +746,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
unsigned long flags;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
qp->valid = 0;
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;

if (qp_type(qp) == IB_QPT_RC) {
Expand Down
9 changes: 5 additions & 4 deletions drivers/infiniband/sw/rxe/rxe_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
unsigned int pkt_type;
unsigned long flags;

if (unlikely(!qp->valid))
return -EINVAL;
Expand All @@ -38,19 +39,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL;
}

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
} else {
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

return 0;
}
Expand Down
30 changes: 17 additions & 13 deletions drivers/infiniband/sw/rxe/rxe_req.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,18 @@ static void req_retry(struct rxe_qp *qp)
void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
unsigned long flags;

rxe_dbg_qp(qp, "nak timer fired\n");

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_sched_task(&qp->req.task);
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

static void req_check_sq_drain_done(struct rxe_qp *qp)
Expand All @@ -118,8 +119,9 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
unsigned int index;
unsigned int cons;
struct rxe_send_wqe *wqe;
unsigned long flags;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_SQD) {
q = qp->sq.queue;
index = qp->req.wqe_index;
Expand All @@ -140,7 +142,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
break;

qp->attr.sq_draining = 0;
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

if (qp->ibqp.event_handler) {
struct ib_event ev;
Expand All @@ -154,7 +156,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
return;
} while (0);
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
}

static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
Expand All @@ -173,20 +175,21 @@ static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
unsigned long flags;

req_check_sq_drain_done(qp);

wqe = __req_next_wqe(qp);
if (wqe == NULL)
return NULL;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
(wqe->state != wqe_state_processing))) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
return NULL;
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
Expand Down Expand Up @@ -676,16 +679,17 @@ int rxe_requester(struct rxe_qp *qp)
struct rxe_queue *q = qp->sq.queue;
struct rxe_ah *ah;
struct rxe_av *av;
unsigned long flags;

spin_lock_bh(&qp->state_lock);
spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(!qp->valid)) {
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}

if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
if (wqe)
goto err;
else
Expand All @@ -700,10 +704,10 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
spin_unlock_bh(&qp->state_lock);
spin_unlock_irqrestore(&qp->state_lock, flags);

/* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit
Expand Down
Loading

0 comments on commit b5f3fe2

Please sign in to comment.