Skip to content

Commit

Permalink
nvme: ensure reset state check ordering
Browse files Browse the repository at this point in the history
[ Upstream commit e6e7f7a ]

A different CPU may be setting the ctrl->state value, so ensure proper
barriers to prevent optimizing to a stale state. Normally it isn't a
problem to observe the wrong state as it is merely advisory to take a
quicker path during initialization and error recovery, but seeing an old
state can report unexpected ENETRESET errors when a reset request was in
fact successful.

Reported-by: Minh Hoang <mh2022@meta.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
keithbusch authored and gregkh committed Jan 20, 2024
1 parent cc5b051 commit 8884a56
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 49 deletions.
42 changes: 22 additions & 20 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}

Expand All @@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
*/
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
if (ctrl->state != NVME_CTRL_RESETTING)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
return -EBUSY;
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
Expand All @@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_ctrl, failfast_work);

if (ctrl->state != NVME_CTRL_CONNECTING)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
return;

set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
Expand Down Expand Up @@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
if (ctrl->state != NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
ret = -ENETRESET;
}

Expand Down Expand Up @@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,

spin_lock_irqsave(&ctrl->lock, flags);

old_state = ctrl->state;
old_state = nvme_ctrl_state(ctrl);
switch (new_state) {
case NVME_CTRL_LIVE:
switch (old_state) {
Expand Down Expand Up @@ -567,19 +567,19 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}

if (changed) {
ctrl->state = new_state;
WRITE_ONCE(ctrl->state, new_state);
wake_up_all(&ctrl->state_wq);
}

spin_unlock_irqrestore(&ctrl->lock, flags);
if (!changed)
return false;

if (ctrl->state == NVME_CTRL_LIVE) {
if (new_state == NVME_CTRL_LIVE) {
if (old_state == NVME_CTRL_CONNECTING)
nvme_stop_failfast_work(ctrl);
nvme_kick_requeue_lists(ctrl);
} else if (ctrl->state == NVME_CTRL_CONNECTING &&
} else if (new_state == NVME_CTRL_CONNECTING &&
old_state == NVME_CTRL_RESETTING) {
nvme_start_failfast_work(ctrl);
}
Expand All @@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
*/
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
{
switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
Expand All @@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
wait_event(ctrl->state_wq,
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
nvme_state_terminal(ctrl));
return ctrl->state == NVME_CTRL_RESETTING;
return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);

Expand Down Expand Up @@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

if (state != NVME_CTRL_DELETING_NOIO &&
state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
Expand Down Expand Up @@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
* command, which is require to set the queue live in the
* appropinquate states.
*/
switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_CONNECTING:
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
Expand Down Expand Up @@ -2542,7 +2544,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)

if (ctrl->ps_max_latency_us != latency) {
ctrl->ps_max_latency_us = latency;
if (ctrl->state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
nvme_configure_apst(ctrl);
}
}
Expand Down Expand Up @@ -3228,7 +3230,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);

switch (ctrl->state) {
switch (nvme_ctrl_state(ctrl)) {
case NVME_CTRL_LIVE:
break;
default:
Expand Down Expand Up @@ -3914,7 +3916,7 @@ static void nvme_scan_work(struct work_struct *work)
int ret;

/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
return;

/*
Expand Down Expand Up @@ -3984,7 +3986,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later.
*/
if (ctrl->state == NVME_CTRL_DEAD)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
nvme_mark_namespaces_dead(ctrl);

/* this is a no-op when called from the controller reset handler */
Expand Down Expand Up @@ -4066,7 +4068,7 @@ static void nvme_async_event_work(struct work_struct *work)
* flushing ctrl async_event_work after changing the controller state
* from LIVE and before freeing the admin queue.
*/
if (ctrl->state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
ctrl->ops->submit_async_event(ctrl);
}

Expand Down Expand Up @@ -4451,7 +4453,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
{
int ret;

ctrl->state = NVME_CTRL_NEW;
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
Expand Down
6 changes: 3 additions & 3 deletions drivers/nvme/host/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
static void
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
switch (ctrl->ctrl.state) {
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
/*
Expand Down Expand Up @@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);

switch (ctrl->ctrl.state) {
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
Expand Down Expand Up @@ -3305,7 +3305,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;

if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
return;

if (portptr->port_state == FC_OBJSTATE_ONLINE) {
Expand Down
14 changes: 7 additions & 7 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);

/* If there is a reset/reinit ongoing, we shouldn't reset again. */
switch (dev->ctrl.state) {
switch (nvme_ctrl_state(&dev->ctrl)) {
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
return false;
Expand Down Expand Up @@ -1322,7 +1322,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* cancellation error. All outstanding requests are completed on
* shutdown, so we return BLK_EH_DONE.
*/
switch (dev->ctrl.state) {
switch (nvme_ctrl_state(&dev->ctrl)) {
case NVME_CTRL_CONNECTING:
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
fallthrough;
Expand Down Expand Up @@ -1594,7 +1594,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
/*
* Controller is in wrong state, fail early.
*/
if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) {
mutex_unlock(&dev->shutdown_lock);
return -ENODEV;
}
Expand Down Expand Up @@ -2574,13 +2574,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev)

static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl);
struct pci_dev *pdev = to_pci_dev(dev->dev);
bool dead;

mutex_lock(&dev->shutdown_lock);
dead = nvme_pci_ctrl_is_dead(dev);
if (dev->ctrl.state == NVME_CTRL_LIVE ||
dev->ctrl.state == NVME_CTRL_RESETTING) {
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) {
if (pci_is_enabled(pdev))
nvme_start_freeze(&dev->ctrl);
/*
Expand Down Expand Up @@ -2691,7 +2691,7 @@ static void nvme_reset_work(struct work_struct *work)
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;

if (dev->ctrl.state != NVME_CTRL_RESETTING) {
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) {
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
dev->ctrl.state);
result = -ENODEV;
Expand Down Expand Up @@ -3207,7 +3207,7 @@ static int nvme_suspend(struct device *dev)
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);

if (ctrl->state != NVME_CTRL_LIVE)
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
goto unfreeze;

/*
Expand Down
23 changes: 14 additions & 9 deletions drivers/nvme/host/rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -984,10 +984,11 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)

static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);

/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
if (state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}

Expand Down Expand Up @@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);

WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
Expand Down Expand Up @@ -1128,8 +1131,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)

if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);

WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}

Expand Down Expand Up @@ -1161,7 +1166,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;

if (ctrl->ctrl.state == NVME_CTRL_LIVE)
if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
dev_info(ctrl->ctrl.device,
"%s for CQE 0x%p failed with status %s (%d)\n",
op, wc->wr_cqe,
Expand Down Expand Up @@ -1944,7 +1949,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));

if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller
Expand Down
27 changes: 17 additions & 10 deletions drivers/nvme/host/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1993,10 +1993,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,

static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
{
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

/* If we are resetting/deleting then do nothing */
if (ctrl->state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
ctrl->state == NVME_CTRL_LIVE);
if (state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
return;
}

Expand Down Expand Up @@ -2056,8 +2057,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
WARN_ON_ONCE(new);
ret = -EINVAL;
goto destroy_io;
Expand Down Expand Up @@ -2124,8 +2127,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)

if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}

Expand Down Expand Up @@ -2155,8 +2160,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work)

if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DELETING_NOIO);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DELETING_NOIO);
return;
}

Expand Down Expand Up @@ -2274,7 +2281,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
opc, nvme_opcode_str(qid, opc, fctype));

if (ctrl->state != NVME_CTRL_LIVE) {
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
/*
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller
Expand Down

0 comments on commit 8884a56

Please sign in to comment.