@@ -2413,27 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
24132413 nvme_fc_ctrl_put (ctrl );
24142414}
24152415
2416- static void __nvme_fc_terminate_io (struct nvme_fc_ctrl * ctrl );
2416+ /*
2417+ * This routine is used by the transport when it needs to find active
2418+ * io on a queue that is to be terminated. The transport uses
2419+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2420+ * this routine to kill them on a 1 by 1 basis.
2421+ *
2422+ * As FC allocates FC exchange for each io, the transport must contact
2423+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
2424+ * After terminating the exchange the LLDD will call the transport's
2425+ * normal io done path for the request, but it will have an aborted
2426+ * status. The done path will return the io request back to the block
2427+ * layer with an error status.
2428+ */
2429+ static bool
2430+ nvme_fc_terminate_exchange (struct request * req , void * data , bool reserved )
2431+ {
2432+ struct nvme_ctrl * nctrl = data ;
2433+ struct nvme_fc_ctrl * ctrl = to_fc_ctrl (nctrl );
2434+ struct nvme_fc_fcp_op * op = blk_mq_rq_to_pdu (req );
2435+
2436+ __nvme_fc_abort_op (ctrl , op );
2437+ return true;
2438+ }
2439+
2440+ /*
2441+ * This routine runs through all outstanding commands on the association
2442+ * and aborts them. This routine is typically be called by the
2443+ * delete_association routine. It is also called due to an error during
2444+ * reconnect. In that scenario, it is most likely a command that initializes
2445+ * the controller, including fabric Connect commands on io queues, that
2446+ * may have timed out or failed thus the io must be killed for the connect
2447+ * thread to see the error.
2448+ */
2449+ static void
2450+ __nvme_fc_abort_outstanding_ios (struct nvme_fc_ctrl * ctrl , bool start_queues )
2451+ {
2452+ /*
2453+ * If io queues are present, stop them and terminate all outstanding
2454+ * ios on them. As FC allocates FC exchange for each io, the
2455+ * transport must contact the LLDD to terminate the exchange,
2456+ * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2457+ * to tell us what io's are busy and invoke a transport routine
2458+ * to kill them with the LLDD. After terminating the exchange
2459+ * the LLDD will call the transport's normal io done path, but it
2460+ * will have an aborted status. The done path will return the
2461+ * io requests back to the block layer as part of normal completions
2462+ * (but with error status).
2463+ */
2464+ if (ctrl -> ctrl .queue_count > 1 ) {
2465+ nvme_stop_queues (& ctrl -> ctrl );
2466+ blk_mq_tagset_busy_iter (& ctrl -> tag_set ,
2467+ nvme_fc_terminate_exchange , & ctrl -> ctrl );
2468+ blk_mq_tagset_wait_completed_request (& ctrl -> tag_set );
2469+ if (start_queues )
2470+ nvme_start_queues (& ctrl -> ctrl );
2471+ }
2472+
2473+ /*
2474+ * Other transports, which don't have link-level contexts bound
2475+ * to sqe's, would try to gracefully shutdown the controller by
2476+ * writing the registers for shutdown and polling (call
2477+ * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2478+ * just aborted and we will wait on those contexts, and given
2479+ * there was no indication of how live the controlelr is on the
2480+ * link, don't send more io to create more contexts for the
2481+ * shutdown. Let the controller fail via keepalive failure if
2482+ * its still present.
2483+ */
2484+
2485+ /*
2486+ * clean up the admin queue. Same thing as above.
2487+ */
2488+ blk_mq_quiesce_queue (ctrl -> ctrl .admin_q );
2489+ blk_mq_tagset_busy_iter (& ctrl -> admin_tag_set ,
2490+ nvme_fc_terminate_exchange , & ctrl -> ctrl );
2491+ blk_mq_tagset_wait_completed_request (& ctrl -> admin_tag_set );
2492+ }
24172493
24182494static void
24192495nvme_fc_error_recovery (struct nvme_fc_ctrl * ctrl , char * errmsg )
24202496{
24212497 /*
2422- * if an error (io timeout, etc) while (re)connecting,
2423- * it's an error on creating the new association.
2424- * Start the error recovery thread if it hasn't already
2425- * been started. It is expected there could be multiple
2426- * ios hitting this path before things are cleaned up .
2498+ * if an error (io timeout, etc) while (re)connecting, the remote
2499+ * port requested terminating of the association (disconnect_ls)
2500+ * or an error (timeout or abort) occurred on an io while creating
2501+ * the controller. Abort any ios on the association and let the
2502+ * create_association error path resolve things.
24272503 */
24282504 if (ctrl -> ctrl .state == NVME_CTRL_CONNECTING ) {
2429- __nvme_fc_terminate_io (ctrl );
2430-
2431- /*
2432- * Rescheduling the connection after recovering
2433- * from the io error is left to the reconnect work
2434- * item, which is what should have stalled waiting on
2435- * the io that had the error that scheduled this work.
2436- */
2505+ __nvme_fc_abort_outstanding_ios (ctrl , true);
2506+ set_bit (ASSOC_FAILED , & ctrl -> flags );
24372507 return ;
24382508 }
24392509
@@ -2747,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq)
27472817 nvme_fc_ctrl_put (ctrl );
27482818}
27492819
2750- /*
2751- * This routine is used by the transport when it needs to find active
2752- * io on a queue that is to be terminated. The transport uses
2753- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2754- * this routine to kill them on a 1 by 1 basis.
2755- *
2756- * As FC allocates FC exchange for each io, the transport must contact
2757- * the LLDD to terminate the exchange, thus releasing the FC exchange.
2758- * After terminating the exchange the LLDD will call the transport's
2759- * normal io done path for the request, but it will have an aborted
2760- * status. The done path will return the io request back to the block
2761- * layer with an error status.
2762- */
2763- static bool
2764- nvme_fc_terminate_exchange (struct request * req , void * data , bool reserved )
2765- {
2766- struct nvme_ctrl * nctrl = data ;
2767- struct nvme_fc_ctrl * ctrl = to_fc_ctrl (nctrl );
2768- struct nvme_fc_fcp_op * op = blk_mq_rq_to_pdu (req );
2769-
2770- __nvme_fc_abort_op (ctrl , op );
2771- return true;
2772- }
2773-
27742820
27752821static const struct blk_mq_ops nvme_fc_mq_ops = {
27762822 .queue_rq = nvme_fc_queue_rq ,
@@ -3111,60 +3157,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
31113157}
31123158
31133159
3114- /*
3115- * This routine runs through all outstanding commands on the association
3116- * and aborts them. This routine is typically be called by the
3117- * delete_association routine. It is also called due to an error during
3118- * reconnect. In that scenario, it is most likely a command that initializes
3119- * the controller, including fabric Connect commands on io queues, that
3120- * may have timed out or failed thus the io must be killed for the connect
3121- * thread to see the error.
3122- */
3123- static void
3124- __nvme_fc_abort_outstanding_ios (struct nvme_fc_ctrl * ctrl , bool start_queues )
3125- {
3126- /*
3127- * If io queues are present, stop them and terminate all outstanding
3128- * ios on them. As FC allocates FC exchange for each io, the
3129- * transport must contact the LLDD to terminate the exchange,
3130- * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
3131- * to tell us what io's are busy and invoke a transport routine
3132- * to kill them with the LLDD. After terminating the exchange
3133- * the LLDD will call the transport's normal io done path, but it
3134- * will have an aborted status. The done path will return the
3135- * io requests back to the block layer as part of normal completions
3136- * (but with error status).
3137- */
3138- if (ctrl -> ctrl .queue_count > 1 ) {
3139- nvme_stop_queues (& ctrl -> ctrl );
3140- blk_mq_tagset_busy_iter (& ctrl -> tag_set ,
3141- nvme_fc_terminate_exchange , & ctrl -> ctrl );
3142- blk_mq_tagset_wait_completed_request (& ctrl -> tag_set );
3143- if (start_queues )
3144- nvme_start_queues (& ctrl -> ctrl );
3145- }
3146-
3147- /*
3148- * Other transports, which don't have link-level contexts bound
3149- * to sqe's, would try to gracefully shutdown the controller by
3150- * writing the registers for shutdown and polling (call
3151- * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
3152- * just aborted and we will wait on those contexts, and given
3153- * there was no indication of how live the controlelr is on the
3154- * link, don't send more io to create more contexts for the
3155- * shutdown. Let the controller fail via keepalive failure if
3156- * its still present.
3157- */
3158-
3159- /*
3160- * clean up the admin queue. Same thing as above.
3161- */
3162- blk_mq_quiesce_queue (ctrl -> ctrl .admin_q );
3163- blk_mq_tagset_busy_iter (& ctrl -> admin_tag_set ,
3164- nvme_fc_terminate_exchange , & ctrl -> ctrl );
3165- blk_mq_tagset_wait_completed_request (& ctrl -> admin_tag_set );
3166- }
3167-
31683160/*
31693161 * This routine stops operation of the controller on the host side.
31703162 * On the host os stack side: Admin and IO queues are stopped,
@@ -3297,17 +3289,6 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
32973289static void
32983290__nvme_fc_terminate_io (struct nvme_fc_ctrl * ctrl )
32993291{
3300- /*
3301- * if state is CONNECTING - the error occurred as part of a
3302- * reconnect attempt. Abort any ios on the association and
3303- * let the create_association error paths resolve things.
3304- */
3305- if (ctrl -> ctrl .state == NVME_CTRL_CONNECTING ) {
3306- __nvme_fc_abort_outstanding_ios (ctrl , true);
3307- set_bit (ASSOC_FAILED , & ctrl -> flags );
3308- return ;
3309- }
3310-
33113292 /*
33123293 * For any other state, kill the association. As this routine
33133294 * is a common io abort routine for resetting and such, after
0 commit comments