@@ -72,6 +72,19 @@ static const unsigned int kyber_batch_size[] = {
7272 [KYBER_OTHER ] = 8 ,
7373};
7474
75+ /*
76+ * There is a same mapping between ctx & hctx and kcq & khd,
77+ * we use request->mq_ctx->index_hw to index the kcq in khd.
78+ */
79+ struct kyber_ctx_queue {
80+ /*
81+ * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
82+ * Also protect the rqs on rq_list when merge.
83+ */
84+ spinlock_t lock ;
85+ struct list_head rq_list [KYBER_NUM_DOMAINS ];
86+ } ____cacheline_aligned_in_smp ;
87+
7588struct kyber_queue_data {
7689 struct request_queue * q ;
7790
@@ -99,6 +112,8 @@ struct kyber_hctx_data {
99112 struct list_head rqs [KYBER_NUM_DOMAINS ];
100113 unsigned int cur_domain ;
101114 unsigned int batching ;
115+ struct kyber_ctx_queue * kcqs ;
116+ struct sbitmap kcq_map [KYBER_NUM_DOMAINS ];
102117 wait_queue_entry_t domain_wait [KYBER_NUM_DOMAINS ];
103118 struct sbq_wait_state * domain_ws [KYBER_NUM_DOMAINS ];
104119 atomic_t wait_index [KYBER_NUM_DOMAINS ];
@@ -107,10 +122,8 @@ struct kyber_hctx_data {
107122static int kyber_domain_wake (wait_queue_entry_t * wait , unsigned mode , int flags ,
108123 void * key );
109124
110- static int rq_sched_domain ( const struct request * rq )
125+ static unsigned int kyber_sched_domain ( unsigned int op )
111126{
112- unsigned int op = rq -> cmd_flags ;
113-
114127 if ((op & REQ_OP_MASK ) == REQ_OP_READ )
115128 return KYBER_READ ;
116129 else if ((op & REQ_OP_MASK ) == REQ_OP_WRITE && op_is_sync (op ))
@@ -284,6 +297,11 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
284297 return kqd -> q -> queue_hw_ctx [0 ]-> sched_tags -> bitmap_tags .sb .shift ;
285298}
286299
300+ static int kyber_bucket_fn (const struct request * rq )
301+ {
302+ return kyber_sched_domain (rq -> cmd_flags );
303+ }
304+
287305static struct kyber_queue_data * kyber_queue_data_alloc (struct request_queue * q )
288306{
289307 struct kyber_queue_data * kqd ;
@@ -297,7 +315,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
297315 goto err ;
298316 kqd -> q = q ;
299317
300- kqd -> cb = blk_stat_alloc_callback (kyber_stat_timer_fn , rq_sched_domain ,
318+ kqd -> cb = blk_stat_alloc_callback (kyber_stat_timer_fn , kyber_bucket_fn ,
301319 KYBER_NUM_DOMAINS , kqd );
302320 if (!kqd -> cb )
303321 goto err_kqd ;
@@ -376,6 +394,15 @@ static void kyber_exit_sched(struct elevator_queue *e)
376394 kfree (kqd );
377395}
378396
397+ static void kyber_ctx_queue_init (struct kyber_ctx_queue * kcq )
398+ {
399+ unsigned int i ;
400+
401+ spin_lock_init (& kcq -> lock );
402+ for (i = 0 ; i < KYBER_NUM_DOMAINS ; i ++ )
403+ INIT_LIST_HEAD (& kcq -> rq_list [i ]);
404+ }
405+
379406static int kyber_init_hctx (struct blk_mq_hw_ctx * hctx , unsigned int hctx_idx )
380407{
381408 struct kyber_queue_data * kqd = hctx -> queue -> elevator -> elevator_data ;
@@ -386,6 +413,24 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
386413 if (!khd )
387414 return - ENOMEM ;
388415
416+ khd -> kcqs = kmalloc_array_node (hctx -> nr_ctx ,
417+ sizeof (struct kyber_ctx_queue ),
418+ GFP_KERNEL , hctx -> numa_node );
419+ if (!khd -> kcqs )
420+ goto err_khd ;
421+
422+ for (i = 0 ; i < hctx -> nr_ctx ; i ++ )
423+ kyber_ctx_queue_init (& khd -> kcqs [i ]);
424+
425+ for (i = 0 ; i < KYBER_NUM_DOMAINS ; i ++ ) {
426+ if (sbitmap_init_node (& khd -> kcq_map [i ], hctx -> nr_ctx ,
427+ ilog2 (8 ), GFP_KERNEL , hctx -> numa_node )) {
428+ while (-- i >= 0 )
429+ sbitmap_free (& khd -> kcq_map [i ]);
430+ goto err_kcqs ;
431+ }
432+ }
433+
389434 spin_lock_init (& khd -> lock );
390435
391436 for (i = 0 ; i < KYBER_NUM_DOMAINS ; i ++ ) {
@@ -405,10 +450,22 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
405450 kqd -> async_depth );
406451
407452 return 0 ;
453+
454+ err_kcqs :
455+ kfree (khd -> kcqs );
456+ err_khd :
457+ kfree (khd );
458+ return - ENOMEM ;
408459}
409460
410461static void kyber_exit_hctx (struct blk_mq_hw_ctx * hctx , unsigned int hctx_idx )
411462{
463+ struct kyber_hctx_data * khd = hctx -> sched_data ;
464+ int i ;
465+
466+ for (i = 0 ; i < KYBER_NUM_DOMAINS ; i ++ )
467+ sbitmap_free (& khd -> kcq_map [i ]);
468+ kfree (khd -> kcqs );
412469 kfree (hctx -> sched_data );
413470}
414471
@@ -430,7 +487,7 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
430487
431488 nr = rq_get_domain_token (rq );
432489 if (nr != -1 ) {
433- sched_domain = rq_sched_domain (rq );
490+ sched_domain = kyber_sched_domain (rq -> cmd_flags );
434491 sbitmap_queue_clear (& kqd -> domain_tokens [sched_domain ], nr ,
435492 rq -> mq_ctx -> cpu );
436493 }
@@ -449,11 +506,51 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
449506 }
450507}
451508
509+ static bool kyber_bio_merge (struct blk_mq_hw_ctx * hctx , struct bio * bio )
510+ {
511+ struct kyber_hctx_data * khd = hctx -> sched_data ;
512+ struct blk_mq_ctx * ctx = blk_mq_get_ctx (hctx -> queue );
513+ struct kyber_ctx_queue * kcq = & khd -> kcqs [ctx -> index_hw ];
514+ unsigned int sched_domain = kyber_sched_domain (bio -> bi_opf );
515+ struct list_head * rq_list = & kcq -> rq_list [sched_domain ];
516+ bool merged ;
517+
518+ spin_lock (& kcq -> lock );
519+ merged = blk_mq_bio_list_merge (hctx -> queue , rq_list , bio );
520+ spin_unlock (& kcq -> lock );
521+ blk_mq_put_ctx (ctx );
522+
523+ return merged ;
524+ }
525+
452526static void kyber_prepare_request (struct request * rq , struct bio * bio )
453527{
454528 rq_set_domain_token (rq , -1 );
455529}
456530
531+ static void kyber_insert_requests (struct blk_mq_hw_ctx * hctx ,
532+ struct list_head * rq_list , bool at_head )
533+ {
534+ struct kyber_hctx_data * khd = hctx -> sched_data ;
535+ struct request * rq , * next ;
536+
537+ list_for_each_entry_safe (rq , next , rq_list , queuelist ) {
538+ unsigned int sched_domain = kyber_sched_domain (rq -> cmd_flags );
539+ struct kyber_ctx_queue * kcq = & khd -> kcqs [rq -> mq_ctx -> index_hw ];
540+ struct list_head * head = & kcq -> rq_list [sched_domain ];
541+
542+ spin_lock (& kcq -> lock );
543+ if (at_head )
544+ list_move (& rq -> queuelist , head );
545+ else
546+ list_move_tail (& rq -> queuelist , head );
547+ sbitmap_set_bit (& khd -> kcq_map [sched_domain ],
548+ rq -> mq_ctx -> index_hw );
549+ blk_mq_sched_request_inserted (rq );
550+ spin_unlock (& kcq -> lock );
551+ }
552+ }
553+
457554static void kyber_finish_request (struct request * rq )
458555{
459556 struct kyber_queue_data * kqd = rq -> q -> elevator -> elevator_data ;
@@ -472,7 +569,7 @@ static void kyber_completed_request(struct request *rq)
472569 * Check if this request met our latency goal. If not, quickly gather
473570 * some statistics and start throttling.
474571 */
475- sched_domain = rq_sched_domain (rq );
572+ sched_domain = kyber_sched_domain (rq -> cmd_flags );
476573 switch (sched_domain ) {
477574 case KYBER_READ :
478575 target = kqd -> read_lat_nsec ;
@@ -498,19 +595,38 @@ static void kyber_completed_request(struct request *rq)
498595 blk_stat_activate_msecs (kqd -> cb , 10 );
499596}
500597
501- static void kyber_flush_busy_ctxs (struct kyber_hctx_data * khd ,
502- struct blk_mq_hw_ctx * hctx )
598+ struct flush_kcq_data {
599+ struct kyber_hctx_data * khd ;
600+ unsigned int sched_domain ;
601+ struct list_head * list ;
602+ };
603+
604+ static bool flush_busy_kcq (struct sbitmap * sb , unsigned int bitnr , void * data )
503605{
504- LIST_HEAD ( rq_list ) ;
505- struct request * rq , * next ;
606+ struct flush_kcq_data * flush_data = data ;
607+ struct kyber_ctx_queue * kcq = & flush_data -> khd -> kcqs [ bitnr ] ;
506608
507- blk_mq_flush_busy_ctxs (hctx , & rq_list );
508- list_for_each_entry_safe (rq , next , & rq_list , queuelist ) {
509- unsigned int sched_domain ;
609+ spin_lock (& kcq -> lock );
610+ list_splice_tail_init (& kcq -> rq_list [flush_data -> sched_domain ],
611+ flush_data -> list );
612+ sbitmap_clear_bit (sb , bitnr );
613+ spin_unlock (& kcq -> lock );
510614
511- sched_domain = rq_sched_domain (rq );
512- list_move_tail (& rq -> queuelist , & khd -> rqs [sched_domain ]);
513- }
615+ return true;
616+ }
617+
618+ static void kyber_flush_busy_kcqs (struct kyber_hctx_data * khd ,
619+ unsigned int sched_domain ,
620+ struct list_head * list )
621+ {
622+ struct flush_kcq_data data = {
623+ .khd = khd ,
624+ .sched_domain = sched_domain ,
625+ .list = list ,
626+ };
627+
628+ sbitmap_for_each_set (& khd -> kcq_map [sched_domain ],
629+ flush_busy_kcq , & data );
514630}
515631
516632static int kyber_domain_wake (wait_queue_entry_t * wait , unsigned mode , int flags ,
@@ -573,26 +689,23 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
573689static struct request *
574690kyber_dispatch_cur_domain (struct kyber_queue_data * kqd ,
575691 struct kyber_hctx_data * khd ,
576- struct blk_mq_hw_ctx * hctx ,
577- bool * flushed )
692+ struct blk_mq_hw_ctx * hctx )
578693{
579694 struct list_head * rqs ;
580695 struct request * rq ;
581696 int nr ;
582697
583698 rqs = & khd -> rqs [khd -> cur_domain ];
584- rq = list_first_entry_or_null (rqs , struct request , queuelist );
585699
586700 /*
587- * If there wasn't already a pending request and we haven't flushed the
588- * software queues yet, flush the software queues and check again.
701+ * If we already have a flushed request, then we just need to get a
702+ * token for it. Otherwise, if there are pending requests in the kcqs,
703+ * flush the kcqs, but only if we can get a token. If not, we should
704+ * leave the requests in the kcqs so that they can be merged. Note that
705+ * khd->lock serializes the flushes, so if we observed any bit set in
706+ * the kcq_map, we will always get a request.
589707 */
590- if (!rq && !* flushed ) {
591- kyber_flush_busy_ctxs (khd , hctx );
592- * flushed = true;
593- rq = list_first_entry_or_null (rqs , struct request , queuelist );
594- }
595-
708+ rq = list_first_entry_or_null (rqs , struct request , queuelist );
596709 if (rq ) {
597710 nr = kyber_get_domain_token (kqd , khd , hctx );
598711 if (nr >= 0 ) {
@@ -601,6 +714,16 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
601714 list_del_init (& rq -> queuelist );
602715 return rq ;
603716 }
717+ } else if (sbitmap_any_bit_set (& khd -> kcq_map [khd -> cur_domain ])) {
718+ nr = kyber_get_domain_token (kqd , khd , hctx );
719+ if (nr >= 0 ) {
720+ kyber_flush_busy_kcqs (khd , khd -> cur_domain , rqs );
721+ rq = list_first_entry (rqs , struct request , queuelist );
722+ khd -> batching ++ ;
723+ rq_set_domain_token (rq , nr );
724+ list_del_init (& rq -> queuelist );
725+ return rq ;
726+ }
604727 }
605728
606729 /* There were either no pending requests or no tokens. */
@@ -611,7 +734,6 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
611734{
612735 struct kyber_queue_data * kqd = hctx -> queue -> elevator -> elevator_data ;
613736 struct kyber_hctx_data * khd = hctx -> sched_data ;
614- bool flushed = false;
615737 struct request * rq ;
616738 int i ;
617739
@@ -622,7 +744,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
622744 * from the batch.
623745 */
624746 if (khd -> batching < kyber_batch_size [khd -> cur_domain ]) {
625- rq = kyber_dispatch_cur_domain (kqd , khd , hctx , & flushed );
747+ rq = kyber_dispatch_cur_domain (kqd , khd , hctx );
626748 if (rq )
627749 goto out ;
628750 }
@@ -643,7 +765,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
643765 else
644766 khd -> cur_domain ++ ;
645767
646- rq = kyber_dispatch_cur_domain (kqd , khd , hctx , & flushed );
768+ rq = kyber_dispatch_cur_domain (kqd , khd , hctx );
647769 if (rq )
648770 goto out ;
649771 }
@@ -660,10 +782,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
660782 int i ;
661783
662784 for (i = 0 ; i < KYBER_NUM_DOMAINS ; i ++ ) {
663- if (!list_empty_careful (& khd -> rqs [i ]))
785+ if (!list_empty_careful (& khd -> rqs [i ]) ||
786+ sbitmap_any_bit_set (& khd -> kcq_map [i ]))
664787 return true;
665788 }
666- return sbitmap_any_bit_set (& hctx -> ctx_map );
789+
790+ return false;
667791}
668792
669793#define KYBER_LAT_SHOW_STORE (op ) \
@@ -834,7 +958,9 @@ static struct elevator_type kyber_sched = {
834958 .init_hctx = kyber_init_hctx ,
835959 .exit_hctx = kyber_exit_hctx ,
836960 .limit_depth = kyber_limit_depth ,
961+ .bio_merge = kyber_bio_merge ,
837962 .prepare_request = kyber_prepare_request ,
963+ .insert_requests = kyber_insert_requests ,
838964 .finish_request = kyber_finish_request ,
839965 .requeue_request = kyber_finish_request ,
840966 .completed_request = kyber_completed_request ,
0 commit comments