Skip to content

Commit 95ced8a

Browse files
James SmartChristoph Hellwig
authored andcommitted
nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery
nvme_fc_error_recovery() special cases handling when in CONNECTING state and calls __nvme_fc_terminate_io(). __nvme_fc_terminate_io() itself special cases CONNECTING state and calls the routine to abort outstanding ios. Simplify the sequence by putting the call to abort outstanding I/Os directly in nvme_fc_error_recovery. Move the location of __nvme_fc_abort_outstanding_ios(), and nvme_fc_terminate_exchange() which is called by it, to avoid adding function prototypes for nvme_fc_error_recovery(). Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
1 parent 9c2bb25 commit 95ced8a

File tree

1 file changed

+84
-103
lines changed
  • drivers/nvme/host

1 file changed

+84
-103
lines changed

drivers/nvme/host/fc.c

Lines changed: 84 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -2413,27 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
24132413
nvme_fc_ctrl_put(ctrl);
24142414
}
24152415

2416-
static void __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl);
2416+
/*
2417+
* This routine is used by the transport when it needs to find active
2418+
* io on a queue that is to be terminated. The transport uses
2419+
* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2420+
* this routine to kill them on a 1 by 1 basis.
2421+
*
2422+
* As FC allocates FC exchange for each io, the transport must contact
2423+
* the LLDD to terminate the exchange, thus releasing the FC exchange.
2424+
* After terminating the exchange the LLDD will call the transport's
2425+
* normal io done path for the request, but it will have an aborted
2426+
* status. The done path will return the io request back to the block
2427+
* layer with an error status.
2428+
*/
2429+
static bool
2430+
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
2431+
{
2432+
struct nvme_ctrl *nctrl = data;
2433+
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2434+
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2435+
2436+
__nvme_fc_abort_op(ctrl, op);
2437+
return true;
2438+
}
2439+
2440+
/*
2441+
* This routine runs through all outstanding commands on the association
2442+
* and aborts them. This routine is typically be called by the
2443+
* delete_association routine. It is also called due to an error during
2444+
* reconnect. In that scenario, it is most likely a command that initializes
2445+
* the controller, including fabric Connect commands on io queues, that
2446+
* may have timed out or failed thus the io must be killed for the connect
2447+
* thread to see the error.
2448+
*/
2449+
static void
2450+
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
2451+
{
2452+
/*
2453+
* If io queues are present, stop them and terminate all outstanding
2454+
* ios on them. As FC allocates FC exchange for each io, the
2455+
* transport must contact the LLDD to terminate the exchange,
2456+
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2457+
* to tell us what io's are busy and invoke a transport routine
2458+
* to kill them with the LLDD. After terminating the exchange
2459+
* the LLDD will call the transport's normal io done path, but it
2460+
* will have an aborted status. The done path will return the
2461+
* io requests back to the block layer as part of normal completions
2462+
* (but with error status).
2463+
*/
2464+
if (ctrl->ctrl.queue_count > 1) {
2465+
nvme_stop_queues(&ctrl->ctrl);
2466+
blk_mq_tagset_busy_iter(&ctrl->tag_set,
2467+
nvme_fc_terminate_exchange, &ctrl->ctrl);
2468+
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
2469+
if (start_queues)
2470+
nvme_start_queues(&ctrl->ctrl);
2471+
}
2472+
2473+
/*
2474+
* Other transports, which don't have link-level contexts bound
2475+
* to sqe's, would try to gracefully shutdown the controller by
2476+
* writing the registers for shutdown and polling (call
2477+
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2478+
* just aborted and we will wait on those contexts, and given
2479+
* there was no indication of how live the controlelr is on the
2480+
* link, don't send more io to create more contexts for the
2481+
* shutdown. Let the controller fail via keepalive failure if
2482+
* its still present.
2483+
*/
2484+
2485+
/*
2486+
* clean up the admin queue. Same thing as above.
2487+
*/
2488+
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2489+
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2490+
nvme_fc_terminate_exchange, &ctrl->ctrl);
2491+
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
2492+
}
24172493

24182494
static void
24192495
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
24202496
{
24212497
/*
2422-
* if an error (io timeout, etc) while (re)connecting,
2423-
* it's an error on creating the new association.
2424-
* Start the error recovery thread if it hasn't already
2425-
* been started. It is expected there could be multiple
2426-
* ios hitting this path before things are cleaned up.
2498+
* if an error (io timeout, etc) while (re)connecting, the remote
2499+
* port requested terminating of the association (disconnect_ls)
2500+
* or an error (timeout or abort) occurred on an io while creating
2501+
* the controller. Abort any ios on the association and let the
2502+
* create_association error path resolve things.
24272503
*/
24282504
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
2429-
__nvme_fc_terminate_io(ctrl);
2430-
2431-
/*
2432-
* Rescheduling the connection after recovering
2433-
* from the io error is left to the reconnect work
2434-
* item, which is what should have stalled waiting on
2435-
* the io that had the error that scheduled this work.
2436-
*/
2505+
__nvme_fc_abort_outstanding_ios(ctrl, true);
2506+
set_bit(ASSOC_FAILED, &ctrl->flags);
24372507
return;
24382508
}
24392509

@@ -2747,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq)
27472817
nvme_fc_ctrl_put(ctrl);
27482818
}
27492819

2750-
/*
2751-
* This routine is used by the transport when it needs to find active
2752-
* io on a queue that is to be terminated. The transport uses
2753-
* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2754-
* this routine to kill them on a 1 by 1 basis.
2755-
*
2756-
* As FC allocates FC exchange for each io, the transport must contact
2757-
* the LLDD to terminate the exchange, thus releasing the FC exchange.
2758-
* After terminating the exchange the LLDD will call the transport's
2759-
* normal io done path for the request, but it will have an aborted
2760-
* status. The done path will return the io request back to the block
2761-
* layer with an error status.
2762-
*/
2763-
static bool
2764-
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
2765-
{
2766-
struct nvme_ctrl *nctrl = data;
2767-
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2768-
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2769-
2770-
__nvme_fc_abort_op(ctrl, op);
2771-
return true;
2772-
}
2773-
27742820

27752821
static const struct blk_mq_ops nvme_fc_mq_ops = {
27762822
.queue_rq = nvme_fc_queue_rq,
@@ -3111,60 +3157,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
31113157
}
31123158

31133159

3114-
/*
3115-
* This routine runs through all outstanding commands on the association
3116-
* and aborts them. This routine is typically be called by the
3117-
* delete_association routine. It is also called due to an error during
3118-
* reconnect. In that scenario, it is most likely a command that initializes
3119-
* the controller, including fabric Connect commands on io queues, that
3120-
* may have timed out or failed thus the io must be killed for the connect
3121-
* thread to see the error.
3122-
*/
3123-
static void
3124-
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
3125-
{
3126-
/*
3127-
* If io queues are present, stop them and terminate all outstanding
3128-
* ios on them. As FC allocates FC exchange for each io, the
3129-
* transport must contact the LLDD to terminate the exchange,
3130-
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
3131-
* to tell us what io's are busy and invoke a transport routine
3132-
* to kill them with the LLDD. After terminating the exchange
3133-
* the LLDD will call the transport's normal io done path, but it
3134-
* will have an aborted status. The done path will return the
3135-
* io requests back to the block layer as part of normal completions
3136-
* (but with error status).
3137-
*/
3138-
if (ctrl->ctrl.queue_count > 1) {
3139-
nvme_stop_queues(&ctrl->ctrl);
3140-
blk_mq_tagset_busy_iter(&ctrl->tag_set,
3141-
nvme_fc_terminate_exchange, &ctrl->ctrl);
3142-
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
3143-
if (start_queues)
3144-
nvme_start_queues(&ctrl->ctrl);
3145-
}
3146-
3147-
/*
3148-
* Other transports, which don't have link-level contexts bound
3149-
* to sqe's, would try to gracefully shutdown the controller by
3150-
* writing the registers for shutdown and polling (call
3151-
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
3152-
* just aborted and we will wait on those contexts, and given
3153-
* there was no indication of how live the controlelr is on the
3154-
* link, don't send more io to create more contexts for the
3155-
* shutdown. Let the controller fail via keepalive failure if
3156-
* its still present.
3157-
*/
3158-
3159-
/*
3160-
* clean up the admin queue. Same thing as above.
3161-
*/
3162-
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
3163-
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
3164-
nvme_fc_terminate_exchange, &ctrl->ctrl);
3165-
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
3166-
}
3167-
31683160
/*
31693161
* This routine stops operation of the controller on the host side.
31703162
* On the host os stack side: Admin and IO queues are stopped,
@@ -3297,17 +3289,6 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
32973289
static void
32983290
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
32993291
{
3300-
/*
3301-
* if state is CONNECTING - the error occurred as part of a
3302-
* reconnect attempt. Abort any ios on the association and
3303-
* let the create_association error paths resolve things.
3304-
*/
3305-
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
3306-
__nvme_fc_abort_outstanding_ios(ctrl, true);
3307-
set_bit(ASSOC_FAILED, &ctrl->flags);
3308-
return;
3309-
}
3310-
33113292
/*
33123293
* For any other state, kill the association. As this routine
33133294
* is a common io abort routine for resetting and such, after

0 commit comments

Comments
 (0)