@@ -374,64 +374,17 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
374374}
375375EXPORT_SYMBOL_GPL (blk_mq_sched_try_insert_merge );
376376
377- static int blk_mq_sched_alloc_map_and_rqs (struct request_queue * q ,
378- struct blk_mq_hw_ctx * hctx ,
379- unsigned int hctx_idx )
380- {
381- if (blk_mq_is_shared_tags (q -> tag_set -> flags )) {
382- hctx -> sched_tags = q -> sched_shared_tags ;
383- return 0 ;
384- }
385-
386- hctx -> sched_tags = blk_mq_alloc_map_and_rqs (q -> tag_set , hctx_idx ,
387- q -> nr_requests );
388-
389- if (!hctx -> sched_tags )
390- return - ENOMEM ;
391- return 0 ;
392- }
393-
394- static void blk_mq_exit_sched_shared_tags (struct request_queue * queue )
395- {
396- blk_mq_free_rq_map (queue -> sched_shared_tags );
397- queue -> sched_shared_tags = NULL ;
398- }
399-
400377/* called in queue's release handler, tagset has gone away */
401378static void blk_mq_sched_tags_teardown (struct request_queue * q , unsigned int flags )
402379{
403380 struct blk_mq_hw_ctx * hctx ;
404381 unsigned long i ;
405382
406- queue_for_each_hw_ctx (q , hctx , i ) {
407- if (hctx -> sched_tags ) {
408- if (!blk_mq_is_shared_tags (flags ))
409- blk_mq_free_rq_map (hctx -> sched_tags );
410- hctx -> sched_tags = NULL ;
411- }
412- }
383+ queue_for_each_hw_ctx (q , hctx , i )
384+ hctx -> sched_tags = NULL ;
413385
414386 if (blk_mq_is_shared_tags (flags ))
415- blk_mq_exit_sched_shared_tags (q );
416- }
417-
418- static int blk_mq_init_sched_shared_tags (struct request_queue * queue )
419- {
420- struct blk_mq_tag_set * set = queue -> tag_set ;
421-
422- /*
423- * Set initial depth at max so that we don't need to reallocate for
424- * updating nr_requests.
425- */
426- queue -> sched_shared_tags = blk_mq_alloc_map_and_rqs (set ,
427- BLK_MQ_NO_HCTX_IDX ,
428- MAX_SCHED_RQ );
429- if (!queue -> sched_shared_tags )
430- return - ENOMEM ;
431-
432- blk_mq_tag_update_sched_shared_tags (queue );
433-
434- return 0 ;
387+ q -> sched_shared_tags = NULL ;
435388}
436389
437390void blk_mq_sched_reg_debugfs (struct request_queue * q )
@@ -458,45 +411,174 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q)
458411 mutex_unlock (& q -> debugfs_mutex );
459412}
460413
414+ void blk_mq_free_sched_tags (struct elevator_tags * et ,
415+ struct blk_mq_tag_set * set )
416+ {
417+ unsigned long i ;
418+
419+ /* Shared tags are stored at index 0 in @tags. */
420+ if (blk_mq_is_shared_tags (set -> flags ))
421+ blk_mq_free_map_and_rqs (set , et -> tags [0 ], BLK_MQ_NO_HCTX_IDX );
422+ else {
423+ for (i = 0 ; i < et -> nr_hw_queues ; i ++ )
424+ blk_mq_free_map_and_rqs (set , et -> tags [i ], i );
425+ }
426+
427+ kfree (et );
428+ }
429+
430+ void blk_mq_free_sched_tags_batch (struct xarray * et_table ,
431+ struct blk_mq_tag_set * set )
432+ {
433+ struct request_queue * q ;
434+ struct elevator_tags * et ;
435+
436+ lockdep_assert_held_write (& set -> update_nr_hwq_lock );
437+
438+ list_for_each_entry (q , & set -> tag_list , tag_set_list ) {
439+ /*
440+ * Accessing q->elevator without holding q->elevator_lock is
441+ * safe because we're holding here set->update_nr_hwq_lock in
442+ * the writer context. So, scheduler update/switch code (which
443+ * acquires the same lock but in the reader context) can't run
444+ * concurrently.
445+ */
446+ if (q -> elevator ) {
447+ et = xa_load (et_table , q -> id );
448+ if (unlikely (!et ))
449+ WARN_ON_ONCE (1 );
450+ else
451+ blk_mq_free_sched_tags (et , set );
452+ }
453+ }
454+ }
455+
456+ struct elevator_tags * blk_mq_alloc_sched_tags (struct blk_mq_tag_set * set ,
457+ unsigned int nr_hw_queues )
458+ {
459+ unsigned int nr_tags ;
460+ int i ;
461+ struct elevator_tags * et ;
462+ gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY ;
463+
464+ if (blk_mq_is_shared_tags (set -> flags ))
465+ nr_tags = 1 ;
466+ else
467+ nr_tags = nr_hw_queues ;
468+
469+ et = kmalloc (sizeof (struct elevator_tags ) +
470+ nr_tags * sizeof (struct blk_mq_tags * ), gfp );
471+ if (!et )
472+ return NULL ;
473+ /*
474+ * Default to double of smaller one between hw queue_depth and
475+ * 128, since we don't split into sync/async like the old code
476+ * did. Additionally, this is a per-hw queue depth.
477+ */
478+ et -> nr_requests = 2 * min_t (unsigned int , set -> queue_depth ,
479+ BLKDEV_DEFAULT_RQ );
480+ et -> nr_hw_queues = nr_hw_queues ;
481+
482+ if (blk_mq_is_shared_tags (set -> flags )) {
483+ /* Shared tags are stored at index 0 in @tags. */
484+ et -> tags [0 ] = blk_mq_alloc_map_and_rqs (set , BLK_MQ_NO_HCTX_IDX ,
485+ MAX_SCHED_RQ );
486+ if (!et -> tags [0 ])
487+ goto out ;
488+ } else {
489+ for (i = 0 ; i < et -> nr_hw_queues ; i ++ ) {
490+ et -> tags [i ] = blk_mq_alloc_map_and_rqs (set , i ,
491+ et -> nr_requests );
492+ if (!et -> tags [i ])
493+ goto out_unwind ;
494+ }
495+ }
496+
497+ return et ;
498+ out_unwind :
499+ while (-- i >= 0 )
500+ blk_mq_free_map_and_rqs (set , et -> tags [i ], i );
501+ out :
502+ kfree (et );
503+ return NULL ;
504+ }
505+
506+ int blk_mq_alloc_sched_tags_batch (struct xarray * et_table ,
507+ struct blk_mq_tag_set * set , unsigned int nr_hw_queues )
508+ {
509+ struct request_queue * q ;
510+ struct elevator_tags * et ;
511+ gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY ;
512+
513+ lockdep_assert_held_write (& set -> update_nr_hwq_lock );
514+
515+ list_for_each_entry (q , & set -> tag_list , tag_set_list ) {
516+ /*
517+ * Accessing q->elevator without holding q->elevator_lock is
518+ * safe because we're holding here set->update_nr_hwq_lock in
519+ * the writer context. So, scheduler update/switch code (which
520+ * acquires the same lock but in the reader context) can't run
521+ * concurrently.
522+ */
523+ if (q -> elevator ) {
524+ et = blk_mq_alloc_sched_tags (set , nr_hw_queues );
525+ if (!et )
526+ goto out_unwind ;
527+ if (xa_insert (et_table , q -> id , et , gfp ))
528+ goto out_free_tags ;
529+ }
530+ }
531+ return 0 ;
532+ out_free_tags :
533+ blk_mq_free_sched_tags (et , set );
534+ out_unwind :
535+ list_for_each_entry_continue_reverse (q , & set -> tag_list , tag_set_list ) {
536+ if (q -> elevator ) {
537+ et = xa_load (et_table , q -> id );
538+ if (et )
539+ blk_mq_free_sched_tags (et , set );
540+ }
541+ }
542+ return - ENOMEM ;
543+ }
544+
461545/* caller must have a reference to @e, will grab another one if successful */
462- int blk_mq_init_sched (struct request_queue * q , struct elevator_type * e )
546+ int blk_mq_init_sched (struct request_queue * q , struct elevator_type * e ,
547+ struct elevator_tags * et )
463548{
464549 unsigned int flags = q -> tag_set -> flags ;
465550 struct blk_mq_hw_ctx * hctx ;
466551 struct elevator_queue * eq ;
467552 unsigned long i ;
468553 int ret ;
469554
470- /*
471- * Default to double of smaller one between hw queue_depth and 128,
472- * since we don't split into sync/async like the old code did.
473- * Additionally, this is a per-hw queue depth.
474- */
475- q -> nr_requests = 2 * min_t (unsigned int , q -> tag_set -> queue_depth ,
476- BLKDEV_DEFAULT_RQ );
555+ eq = elevator_alloc (q , e , et );
556+ if (!eq )
557+ return - ENOMEM ;
558+
559+ q -> nr_requests = et -> nr_requests ;
477560
478561 if (blk_mq_is_shared_tags (flags )) {
479- ret = blk_mq_init_sched_shared_tags ( q );
480- if ( ret )
481- return ret ;
562+ /* Shared tags are stored at index 0 in @et->tags. */
563+ q -> sched_shared_tags = et -> tags [ 0 ];
564+ blk_mq_tag_update_sched_shared_tags ( q ) ;
482565 }
483566
484567 queue_for_each_hw_ctx (q , hctx , i ) {
485- ret = blk_mq_sched_alloc_map_and_rqs (q , hctx , i );
486- if (ret )
487- goto err_free_map_and_rqs ;
568+ if (blk_mq_is_shared_tags (flags ))
569+ hctx -> sched_tags = q -> sched_shared_tags ;
570+ else
571+ hctx -> sched_tags = et -> tags [i ];
488572 }
489573
490- ret = e -> ops .init_sched (q , e );
574+ ret = e -> ops .init_sched (q , eq );
491575 if (ret )
492- goto err_free_map_and_rqs ;
576+ goto out ;
493577
494578 queue_for_each_hw_ctx (q , hctx , i ) {
495579 if (e -> ops .init_hctx ) {
496580 ret = e -> ops .init_hctx (hctx , i );
497581 if (ret ) {
498- eq = q -> elevator ;
499- blk_mq_sched_free_rqs (q );
500582 blk_mq_exit_sched (q , eq );
501583 kobject_put (& eq -> kobj );
502584 return ret ;
@@ -505,10 +587,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
505587 }
506588 return 0 ;
507589
508- err_free_map_and_rqs :
509- blk_mq_sched_free_rqs (q );
590+ out :
510591 blk_mq_sched_tags_teardown (q , flags );
511-
592+ kobject_put ( & eq -> kobj );
512593 q -> elevator = NULL ;
513594 return ret ;
514595}
0 commit comments