Skip to content

Commit 6c0ed2b

Browse files
author
CKI KWF Bot
committed
Merge: Backport important patches and bug fixes from 6.15 kernel
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1151 # Merge Request Required Information JIRA: https://issues.redhat.com/browse/RHEL-95382 ## Summary of Changes Backport fixes from 6.15 kernel Omitted-Fix: d681107 ("nvme-multipath: fix suspicious RCU usage warning") Signed-off-by: Maurizio Lombardi <mlombard@redhat.com> ## Approved Development Ticket(s) All submissions to CentOS Stream must reference a ticket in [Red Hat Jira](https://issues.redhat.com/). <details><summary>Click for formatting instructions</summary> Please follow the CentOS Stream [contribution documentation](https://docs.centos.org/en-US/stream-contrib/quickstart/) for how to file this ticket and have it approved. List tickets each on their own line of this description using the format "Resolves: RHEL-76229", "Related: RHEL-76229" or "Reverts: RHEL-76229", as appropriate. </details> Approved-by: John Meneghini <jmeneghi@redhat.com> Approved-by: Ewan D. Milne <emilne@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents db1dde3 + 7934f0e commit 6c0ed2b

File tree

15 files changed

+314
-65
lines changed

15 files changed

+314
-65
lines changed

drivers/nvme/host/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ config NVME_TCP_TLS
9797
depends on NVME_TCP
9898
select NET_HANDSHAKE
9999
select KEYS
100+
select TLS
100101
help
101102
Enables TLS encryption for NVMe TCP using the netlink handshake API.
102103

drivers/nvme/host/core.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4011,6 +4011,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
40114011

40124012
if (!nvme_ns_head_multipath(ns->head))
40134013
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
4014+
4015+
nvme_mpath_remove_sysfs_link(ns);
4016+
40144017
del_gendisk(ns->disk);
40154018

40164019
mutex_lock(&ns->ctrl->namespaces_lock);
@@ -4248,6 +4251,15 @@ static void nvme_scan_work(struct work_struct *work)
42484251
nvme_scan_ns_sequential(ctrl);
42494252
}
42504253
mutex_unlock(&ctrl->scan_lock);
4254+
4255+
/* Requeue if we have missed AENs */
4256+
if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events))
4257+
nvme_queue_scan(ctrl);
4258+
#ifdef CONFIG_NVME_MULTIPATH
4259+
else if (ctrl->ana_log_buf)
4260+
/* Re-read the ANA log page to not miss updates */
4261+
queue_work(nvme_wq, &ctrl->ana_work);
4262+
#endif
42514263
}
42524264

42534265
/*
@@ -4422,11 +4434,9 @@ static void nvme_fw_act_work(struct work_struct *work)
44224434
nvme_auth_stop(ctrl);
44234435

44244436
if (ctrl->mtfa)
4425-
fw_act_timeout = jiffies +
4426-
msecs_to_jiffies(ctrl->mtfa * 100);
4437+
fw_act_timeout = jiffies + msecs_to_jiffies(ctrl->mtfa * 100);
44274438
else
4428-
fw_act_timeout = jiffies +
4429-
msecs_to_jiffies(admin_timeout * 1000);
4439+
fw_act_timeout = jiffies + secs_to_jiffies(admin_timeout);
44304440

44314441
nvme_quiesce_io_queues(ctrl);
44324442
while (nvme_ctrl_pp_status(ctrl)) {
@@ -4439,7 +4449,8 @@ static void nvme_fw_act_work(struct work_struct *work)
44394449
msleep(100);
44404450
}
44414451

4442-
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
4452+
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) ||
4453+
!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
44434454
return;
44444455

44454456
nvme_unquiesce_io_queues(ctrl);

drivers/nvme/host/fc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2858,7 +2858,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
28582858
unsigned int nr_io_queues;
28592859
int ret;
28602860

2861-
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
2861+
nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
28622862
ctrl->lport->ops->max_hw_queues);
28632863
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
28642864
if (ret) {
@@ -2912,7 +2912,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
29122912
unsigned int nr_io_queues;
29132913
int ret;
29142914

2915-
nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
2915+
nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(),
29162916
ctrl->lport->ops->max_hw_queues);
29172917
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
29182918
if (ret) {

drivers/nvme/host/ioctl.c

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
114114

115115
static int nvme_map_user_request(struct request *req, u64 ubuffer,
116116
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
117-
struct io_uring_cmd *ioucmd, unsigned int flags)
117+
struct iov_iter *iter, unsigned int flags)
118118
{
119119
struct request_queue *q = req->q;
120120
struct nvme_ns *ns = q->queuedata;
@@ -128,36 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
128128
if (!nvme_ctrl_sgl_supported(ctrl))
129129
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
130130
if (has_metadata) {
131-
if (!supports_metadata) {
132-
ret = -EINVAL;
133-
goto out;
134-
}
131+
if (!supports_metadata)
132+
return -EINVAL;
133+
135134
if (!nvme_ctrl_meta_sgl_supported(ctrl))
136135
dev_warn_once(ctrl->device,
137136
"using unchecked metadata buffer\n");
138137
}
139138

140-
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
141-
struct iov_iter iter;
142-
143-
/* fixedbufs is only for non-vectored io */
144-
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
145-
ret = -EINVAL;
146-
goto out;
147-
}
148-
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
149-
rq_data_dir(req), &iter, ioucmd);
150-
if (ret < 0)
151-
goto out;
152-
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
153-
} else {
139+
if (iter)
140+
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
141+
else
154142
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
155143
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
156144
0, rq_data_dir(req));
157-
}
158145

159146
if (ret)
160-
goto out;
147+
return ret;
161148

162149
bio = req->bio;
163150
if (bdev)
@@ -174,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
174161
out_unmap:
175162
if (bio)
176163
blk_rq_unmap_user(bio);
177-
out:
178-
blk_mq_free_request(req);
179164
return ret;
180165
}
181166

@@ -200,7 +185,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
200185
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
201186
meta_len, NULL, flags);
202187
if (ret)
203-
return ret;
188+
goto out_free_req;
204189
}
205190

206191
bio = req->bio;
@@ -216,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,
216201

217202
if (effects)
218203
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
204+
return ret;
219205

206+
out_free_req:
207+
blk_mq_free_request(req);
220208
return ret;
221209
}
222210

@@ -467,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
467455
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
468456
struct nvme_uring_data d;
469457
struct nvme_command c;
458+
struct iov_iter iter;
459+
struct iov_iter *map_iter = NULL;
470460
struct request *req;
471461
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
472462
blk_mq_req_flags_t blk_flags = 0;
@@ -502,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
502492
d.metadata_len = READ_ONCE(cmd->metadata_len);
503493
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
504494

495+
if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
496+
/* fixedbufs is only for non-vectored io */
497+
if (vec)
498+
return -EINVAL;
499+
500+
ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
501+
nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
502+
issue_flags);
503+
if (ret < 0)
504+
return ret;
505+
506+
map_iter = &iter;
507+
}
508+
505509
if (issue_flags & IO_URING_F_NONBLOCK) {
506510
rq_flags |= REQ_NOWAIT;
507511
blk_flags = BLK_MQ_REQ_NOWAIT;
@@ -514,12 +518,12 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
514518
return PTR_ERR(req);
515519
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
516520

517-
if (d.addr && d.data_len) {
518-
ret = nvme_map_user_request(req, d.addr,
519-
d.data_len, nvme_to_user_ptr(d.metadata),
520-
d.metadata_len, ioucmd, vec);
521+
if (d.data_len) {
522+
ret = nvme_map_user_request(req, d.addr, d.data_len,
523+
nvme_to_user_ptr(d.metadata), d.metadata_len,
524+
map_iter, vec);
521525
if (ret)
522-
return ret;
526+
goto out_free_req;
523527
}
524528

525529
/* to free bio on completion, as req->bio will be null at that time */
@@ -529,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
529533
req->end_io = nvme_uring_cmd_end_io;
530534
blk_execute_rq_nowait(req, false);
531535
return -EIOCBQUEUED;
536+
537+
out_free_req:
538+
blk_mq_free_request(req);
539+
return ret;
532540
}
533541

534542
static bool is_ctrl_ioctl(unsigned int cmd)

drivers/nvme/host/multipath.c

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
422422
struct nvme_ns *ns;
423423

424424
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
425-
return NULL;
425+
return false;
426426

427427
list_for_each_entry_srcu(ns, &head->list, siblings,
428428
srcu_read_lock_held(&head->srcu)) {
@@ -683,6 +683,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
683683
kblockd_schedule_work(&head->partition_scan_work);
684684
}
685685

686+
nvme_mpath_add_sysfs_link(ns->head);
687+
686688
mutex_lock(&head->lock);
687689
if (nvme_path_is_optimized(ns)) {
688690
int node, srcu_idx;
@@ -765,6 +767,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
765767
if (nvme_state_is_live(ns->ana_state) &&
766768
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
767769
nvme_mpath_set_live(ns);
770+
else {
771+
/*
772+
* Add sysfs link from multipath head gendisk node to path
773+
* device gendisk node.
774+
* If path's ana state is live (i.e. state is either optimized
775+
* or non-optimized) while we alloc the ns then sysfs link would
776+
* be created from nvme_mpath_set_live(). In that case we would
777+
* not fallthrough this code path. However for the path's ana
778+
* state other than live, we call nvme_mpath_set_live() only
779+
* after ana state transitioned to the live state. But we still
780+
* want to create the sysfs link from head node to a path device
781+
* irrespctive of the path's ana state.
782+
* If we reach through here then it means that path's ana state
783+
* is not live but still create the sysfs link to this path from
784+
* head node if head node of the path has already come alive.
785+
*/
786+
if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags))
787+
nvme_mpath_add_sysfs_link(ns->head);
788+
}
768789
}
769790

770791
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -952,6 +973,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
952973
}
953974
DEVICE_ATTR_RO(ana_state);
954975

976+
static ssize_t queue_depth_show(struct device *dev,
977+
struct device_attribute *attr, char *buf)
978+
{
979+
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
980+
981+
if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD)
982+
return 0;
983+
984+
return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active));
985+
}
986+
DEVICE_ATTR_RO(queue_depth);
987+
988+
static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr,
989+
char *buf)
990+
{
991+
int node, srcu_idx;
992+
nodemask_t numa_nodes;
993+
struct nvme_ns *current_ns;
994+
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
995+
struct nvme_ns_head *head = ns->head;
996+
997+
if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA)
998+
return 0;
999+
1000+
nodes_clear(numa_nodes);
1001+
1002+
srcu_idx = srcu_read_lock(&head->srcu);
1003+
for_each_node(node) {
1004+
current_ns = srcu_dereference(head->current_path[node],
1005+
&head->srcu);
1006+
if (ns == current_ns)
1007+
node_set(node, numa_nodes);
1008+
}
1009+
srcu_read_unlock(&head->srcu, srcu_idx);
1010+
1011+
return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes));
1012+
}
1013+
DEVICE_ATTR_RO(numa_nodes);
1014+
9551015
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
9561016
struct nvme_ana_group_desc *desc, void *data)
9571017
{
@@ -964,6 +1024,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
9641024
return -ENXIO; /* just break out of the loop */
9651025
}
9661026

1027+
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
1028+
{
1029+
struct device *target;
1030+
int rc, srcu_idx;
1031+
struct nvme_ns *ns;
1032+
struct kobject *kobj;
1033+
1034+
/*
1035+
* Ensure head disk node is already added otherwise we may get invalid
1036+
* kobj for head disk node
1037+
*/
1038+
if (!test_bit(GD_ADDED, &head->disk->state))
1039+
return;
1040+
1041+
kobj = &disk_to_dev(head->disk)->kobj;
1042+
1043+
/*
1044+
* loop through each ns chained through the head->list and create the
1045+
* sysfs link from head node to the ns path node
1046+
*/
1047+
srcu_idx = srcu_read_lock(&head->srcu);
1048+
1049+
list_for_each_entry_rcu(ns, &head->list, siblings) {
1050+
/*
1051+
* Ensure that ns path disk node is already added otherwise we
1052+
* may get invalid kobj name for target
1053+
*/
1054+
if (!test_bit(GD_ADDED, &ns->disk->state))
1055+
continue;
1056+
1057+
/*
1058+
* Avoid creating link if it already exists for the given path.
1059+
* When path ana state transitions from optimized to non-
1060+
* optimized or vice-versa, the nvme_mpath_set_live() is
1061+
* invoked which in truns call this function. Now if the sysfs
1062+
* link already exists for the given path and we attempt to re-
1063+
* create the link then sysfs code would warn about it loudly.
1064+
* So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure
1065+
* that we're not creating duplicate link.
1066+
* The test_and_set_bit() is used because it is protecting
1067+
* against multiple nvme paths being simultaneously added.
1068+
*/
1069+
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
1070+
continue;
1071+
1072+
target = disk_to_dev(ns->disk);
1073+
/*
1074+
* Create sysfs link from head gendisk kobject @kobj to the
1075+
* ns path gendisk kobject @target->kobj.
1076+
*/
1077+
rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name,
1078+
&target->kobj, dev_name(target));
1079+
if (unlikely(rc)) {
1080+
dev_err(disk_to_dev(ns->head->disk),
1081+
"failed to create link to %s\n",
1082+
dev_name(target));
1083+
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
1084+
}
1085+
}
1086+
1087+
srcu_read_unlock(&head->srcu, srcu_idx);
1088+
}
1089+
1090+
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
1091+
{
1092+
struct device *target;
1093+
struct kobject *kobj;
1094+
1095+
if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
1096+
return;
1097+
1098+
target = disk_to_dev(ns->disk);
1099+
kobj = &disk_to_dev(ns->head->disk)->kobj;
1100+
sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name,
1101+
dev_name(target));
1102+
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
1103+
}
1104+
9671105
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
9681106
{
9691107
if (nvme_ctrl_use_ana(ns->ctrl)) {

0 commit comments

Comments
 (0)