Skip to content

Commit 9aab74e

Browse files
author
Ming Lei
committed
blk-mq: Defer freeing of tags page_list to SRCU callback
JIRA: https://issues.redhat.com/browse/RHEL-123480 commit ad0d05d Author: Ming Lei <ming.lei@redhat.com> Date: Sat Aug 30 10:18:21 2025 +0800 blk-mq: Defer freeing of tags page_list to SRCU callback Tag iterators can race with the freeing of the request pages(tags->page_list), potentially leading to use-after-free issues. Defer the freeing of the page list and the tags structure itself until after an SRCU grace period has passed. This ensures that any concurrent tag iterators have completed before the memory is released. With this way, we can replace the big tags->lock in tags iterator code path with srcu for solving the issue. This is achieved by: - Adding a new `srcu_struct tags_srcu` to `blk_mq_tag_set` to protect tag map iteration. - Adding an `rcu_head` to `struct blk_mq_tags` to be used with `call_srcu`. - Moving the page list freeing logic and the `kfree(tags)` call into a new callback function, `blk_mq_free_tags_callback`. - In `blk_mq_free_tags`, invoking `call_srcu` to schedule the new callback for deferred execution. The read-side protection for the tag iterators will be added in a subsequent patch. Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Ming Lei <ming.lei@redhat.com>
1 parent eda11d6 commit 9aab74e

File tree

3 files changed

+38
-14
lines changed

3 files changed

+38
-14
lines changed

block/blk-mq-tag.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
*/
99
#include <linux/kernel.h>
1010
#include <linux/module.h>
11+
#include <linux/slab.h>
12+
#include <linux/mm.h>
13+
#include <linux/kmemleak.h>
1114

1215
#include <linux/delay.h>
1316
#include "blk.h"
@@ -576,11 +579,30 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
576579
return NULL;
577580
}
578581

582+
static void blk_mq_free_tags_callback(struct rcu_head *head)
583+
{
584+
struct blk_mq_tags *tags = container_of(head, struct blk_mq_tags,
585+
rcu_head);
586+
struct page *page;
587+
588+
while (!list_empty(&tags->page_list)) {
589+
page = list_first_entry(&tags->page_list, struct page, lru);
590+
list_del_init(&page->lru);
591+
/*
592+
* Remove kmemleak object previously allocated in
593+
* blk_mq_alloc_rqs().
594+
*/
595+
kmemleak_free(page_address(page));
596+
__free_pages(page, page->private);
597+
}
598+
kfree(tags);
599+
}
600+
579601
void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
580602
{
581603
sbitmap_queue_free(&tags->bitmap_tags);
582604
sbitmap_queue_free(&tags->breserved_tags);
583-
kfree(tags);
605+
call_srcu(&set->tags_srcu, &tags->rcu_head, blk_mq_free_tags_callback);
584606
}
585607

586608
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,

block/blk-mq.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3454,7 +3454,6 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
34543454
unsigned int hctx_idx)
34553455
{
34563456
struct blk_mq_tags *drv_tags;
3457-
struct page *page;
34583457

34593458
if (list_empty(&tags->page_list))
34603459
return;
@@ -3478,17 +3477,10 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
34783477
}
34793478

34803479
blk_mq_clear_rq_mapping(drv_tags, tags);
3481-
3482-
while (!list_empty(&tags->page_list)) {
3483-
page = list_first_entry(&tags->page_list, struct page, lru);
3484-
list_del_init(&page->lru);
3485-
/*
3486-
* Remove kmemleak object previously allocated in
3487-
* blk_mq_alloc_rqs().
3488-
*/
3489-
kmemleak_free(page_address(page));
3490-
__free_pages(page, page->private);
3491-
}
3480+
/*
3481+
* Free request pages in SRCU callback, which is called from
3482+
* blk_mq_free_tags().
3483+
*/
34923484
}
34933485

34943486
void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
@@ -4834,6 +4826,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48344826
if (ret)
48354827
goto out_free_srcu;
48364828
}
4829+
ret = init_srcu_struct(&set->tags_srcu);
4830+
if (ret)
4831+
goto out_cleanup_srcu;
48374832

48384833
init_rwsem(&set->update_nr_hwq_lock);
48394834

@@ -4842,7 +4837,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48424837
sizeof(struct blk_mq_tags *), GFP_KERNEL,
48434838
set->numa_node);
48444839
if (!set->tags)
4845-
goto out_cleanup_srcu;
4840+
goto out_cleanup_tags_srcu;
48464841

48474842
for (i = 0; i < set->nr_maps; i++) {
48484843
set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
@@ -4871,6 +4866,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48714866
}
48724867
kfree(set->tags);
48734868
set->tags = NULL;
4869+
out_cleanup_tags_srcu:
4870+
cleanup_srcu_struct(&set->tags_srcu);
48744871
out_cleanup_srcu:
48754872
if (set->flags & BLK_MQ_F_BLOCKING)
48764873
cleanup_srcu_struct(set->srcu);
@@ -4916,6 +4913,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
49164913

49174914
kfree(set->tags);
49184915
set->tags = NULL;
4916+
4917+
srcu_barrier(&set->tags_srcu);
4918+
cleanup_srcu_struct(&set->tags_srcu);
49194919
if (set->flags & BLK_MQ_F_BLOCKING) {
49204920
cleanup_srcu_struct(set->srcu);
49214921
kfree(set->srcu);

include/linux/blk-mq.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ struct blk_mq_tag_set {
542542
struct mutex tag_list_lock;
543543
struct list_head tag_list;
544544
struct srcu_struct *srcu;
545+
struct srcu_struct tags_srcu;
545546

546547
struct rw_semaphore update_nr_hwq_lock;
547548
RH_KABI_RESERVE(1)
@@ -797,6 +798,7 @@ struct blk_mq_tags {
797798
* request pool
798799
*/
799800
spinlock_t lock;
801+
struct rcu_head rcu_head;
800802
};
801803

802804
static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,

0 commit comments

Comments
 (0)