Skip to content

Commit 97cb268

Browse files
author
CKI KWF Bot
committed
Merge: nvme-tcp: Fix I/O queue cpu spreading for multiple controllers
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7583 JIRA: https://issues.redhat.com/browse/RHEL-76529 Fix I/O queue cpu spreading for multiple controllers Signed-off-by: John Meneghini <jmeneghi@redhat.com> Approved-by: Ewan D. Milne <emilne@redhat.com> Approved-by: Chris Leech <cleech@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 225870d + 8f92d6b commit 97cb268

File tree

1 file changed

+57
-13
lines changed

1 file changed

+57
-13
lines changed

drivers/nvme/host/tcp.c

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ MODULE_PARM_DESC(tls_handshake_timeout,
5353
"nvme TLS handshake timeout in seconds (default 10)");
5454
#endif
5555

56+
static atomic_t nvme_tcp_cpu_queues[NR_CPUS];
57+
5658
#ifdef CONFIG_DEBUG_LOCK_ALLOC
5759
/* lockdep can detect a circular dependency of the form
5860
* sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
@@ -126,6 +128,7 @@ enum nvme_tcp_queue_flags {
126128
NVME_TCP_Q_ALLOCATED = 0,
127129
NVME_TCP_Q_LIVE = 1,
128130
NVME_TCP_Q_POLLING = 2,
131+
NVME_TCP_Q_IO_CPU_SET = 3,
129132
};
130133

131134
enum nvme_tcp_recv_state {
@@ -1648,23 +1651,56 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
16481651
ctrl->io_queues[HCTX_TYPE_POLL];
16491652
}
16501653

1654+
/*
1655+
* Track the number of queues assigned to each cpu using a global per-cpu
1656+
* counter and select the least used cpu from the mq_map. Our goal is to spread
1657+
* different controllers I/O threads across different cpu cores.
1658+
*
1659+
* Note that the accounting is not 100% perfect, but we don't need to be, we're
1660+
* simply putting our best effort to select the best candidate cpu core that we
1661+
* find at any given point.
1662+
*/
16511663
static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
16521664
{
16531665
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1654-
int qid = nvme_tcp_queue_id(queue);
1655-
int n = 0;
1666+
struct blk_mq_tag_set *set = &ctrl->tag_set;
1667+
int qid = nvme_tcp_queue_id(queue) - 1;
1668+
unsigned int *mq_map = NULL;
1669+
int cpu, min_queues = INT_MAX, io_cpu;
1670+
1671+
if (wq_unbound)
1672+
goto out;
16561673

16571674
if (nvme_tcp_default_queue(queue))
1658-
n = qid - 1;
1675+
mq_map = set->map[HCTX_TYPE_DEFAULT].mq_map;
16591676
else if (nvme_tcp_read_queue(queue))
1660-
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
1677+
mq_map = set->map[HCTX_TYPE_READ].mq_map;
16611678
else if (nvme_tcp_poll_queue(queue))
1662-
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
1663-
ctrl->io_queues[HCTX_TYPE_READ] - 1;
1664-
if (wq_unbound)
1665-
queue->io_cpu = WORK_CPU_UNBOUND;
1666-
else
1667-
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
1679+
mq_map = set->map[HCTX_TYPE_POLL].mq_map;
1680+
1681+
if (WARN_ON(!mq_map))
1682+
goto out;
1683+
1684+
/* Search for the least used cpu from the mq_map */
1685+
io_cpu = WORK_CPU_UNBOUND;
1686+
for_each_online_cpu(cpu) {
1687+
int num_queues = atomic_read(&nvme_tcp_cpu_queues[cpu]);
1688+
1689+
if (mq_map[cpu] != qid)
1690+
continue;
1691+
if (num_queues < min_queues) {
1692+
io_cpu = cpu;
1693+
min_queues = num_queues;
1694+
}
1695+
}
1696+
if (io_cpu != WORK_CPU_UNBOUND) {
1697+
queue->io_cpu = io_cpu;
1698+
atomic_inc(&nvme_tcp_cpu_queues[io_cpu]);
1699+
set_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags);
1700+
}
1701+
out:
1702+
dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n",
1703+
qid, queue->io_cpu);
16681704
}
16691705

16701706
static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@@ -1808,7 +1844,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
18081844

18091845
queue->sock->sk->sk_allocation = GFP_ATOMIC;
18101846
queue->sock->sk->sk_use_task_frag = false;
1811-
nvme_tcp_set_queue_io_cpu(queue);
1847+
queue->io_cpu = WORK_CPU_UNBOUND;
18121848
queue->request = NULL;
18131849
queue->data_remaining = 0;
18141850
queue->ddgst_remaining = 0;
@@ -1930,6 +1966,9 @@ static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid)
19301966
if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
19311967
return;
19321968

1969+
if (test_and_clear_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags))
1970+
atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]);
1971+
19331972
mutex_lock(&queue->queue_lock);
19341973
if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
19351974
__nvme_tcp_stop_queue(queue);
@@ -1989,9 +2028,10 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
19892028
nvme_tcp_init_recv_ctx(queue);
19902029
nvme_tcp_setup_sock_ops(queue);
19912030

1992-
if (idx)
2031+
if (idx) {
2032+
nvme_tcp_set_queue_io_cpu(queue);
19932033
ret = nvmf_connect_io_queue(nctrl, idx);
1994-
else
2034+
} else
19952035
ret = nvmf_connect_admin_queue(nctrl);
19962036

19972037
if (!ret) {
@@ -3012,6 +3052,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
30123052
static int __init nvme_tcp_init_module(void)
30133053
{
30143054
unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
3055+
int cpu;
30153056

30163057
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
30173058
BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
@@ -3029,6 +3070,9 @@ static int __init nvme_tcp_init_module(void)
30293070
if (!nvme_tcp_wq)
30303071
return -ENOMEM;
30313072

3073+
for_each_possible_cpu(cpu)
3074+
atomic_set(&nvme_tcp_cpu_queues[cpu], 0);
3075+
30323076
nvmf_register_transport(&nvme_tcp_transport);
30333077
return 0;
30343078
}

0 commit comments

Comments
 (0)