Skip to content

Commit 3c66d3d

Browse files
committed
intel_idle: Add C1 demotion on/off sysfs knob
JIRA: https://issues.redhat.com/browse/RHEL-110839 commit 6138f34 Author: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Date: Mon Mar 17 15:55:39 2025 +0200 intel_idle: Add C1 demotion on/off sysfs knob Add a sysfs knob to enable/disable C1 demotion for the following Intel platforms: Sapphire Rapids Xeon, Emerald Rapids Xeon, Granite Rapids Xeon, Sierra Forest Xeon, and Grand Ridge SoC. This sysfs file shows up as "/sys/devices/system/cpu/cpuidle/intel_c1_demotion". The C1 demotion feature involves the platform firmware demoting deep C-state requests from the OS (e.g., C6 requests) to C1. The idea is that firmware monitors CPU wake-up rate, and if it is higher than a platform-specific threshold, the firmware demotes deep C-state requests to C1. For example, Linux requests C6, but firmware noticed too many wake-ups per second, and it keeps the CPU in C1. When the CPU stays in C1 long enough, the platform promotes it back to C6. The default value for C1 demotion is whatever is configured by BIOS. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Link: https://patch.msgid.link/20250317135541.1471754-2-dedekind1@gmail.com Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Signed-off-by: Steve Best <sbest@redhat.com>
1 parent 87b9c8f commit 3c66d3d

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

drivers/idle/intel_idle.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@
4848
#include <trace/events/power.h>
4949
#include <linux/sched.h>
5050
#include <linux/sched/smt.h>
51+
#include <linux/mutex.h>
5152
#include <linux/notifier.h>
5253
#include <linux/cpu.h>
5354
#include <linux/moduleparam.h>
55+
#include <linux/sysfs.h>
5456
#include <asm/cpuid.h>
5557
#include <asm/cpu_device_id.h>
5658
#include <asm/intel-family.h>
@@ -92,9 +94,15 @@ struct idle_cpu {
9294
unsigned long auto_demotion_disable_flags;
9395
bool byt_auto_demotion_disable_flag;
9496
bool disable_promotion_to_c1e;
97+
bool c1_demotion_supported;
9598
bool use_acpi;
9699
};
97100

101+
static bool c1_demotion_supported;
102+
static DEFINE_MUTEX(c1_demotion_mutex);
103+
104+
static struct device *sysfs_root __initdata;
105+
98106
static const struct idle_cpu *icpu __initdata;
99107
static struct cpuidle_state *cpuidle_state_table __initdata;
100108

@@ -1542,18 +1550,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
15421550
static const struct idle_cpu idle_cpu_spr __initconst = {
15431551
.state_table = spr_cstates,
15441552
.disable_promotion_to_c1e = true,
1553+
.c1_demotion_supported = true,
15451554
.use_acpi = true,
15461555
};
15471556

15481557
static const struct idle_cpu idle_cpu_gnr __initconst = {
15491558
.state_table = gnr_cstates,
15501559
.disable_promotion_to_c1e = true,
1560+
.c1_demotion_supported = true,
15511561
.use_acpi = true,
15521562
};
15531563

15541564
static const struct idle_cpu idle_cpu_gnrd __initconst = {
15551565
.state_table = gnrd_cstates,
15561566
.disable_promotion_to_c1e = true,
1567+
.c1_demotion_supported = true,
15571568
.use_acpi = true,
15581569
};
15591570

@@ -1592,12 +1603,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
15921603
static const struct idle_cpu idle_cpu_grr __initconst = {
15931604
.state_table = grr_cstates,
15941605
.disable_promotion_to_c1e = true,
1606+
.c1_demotion_supported = true,
15951607
.use_acpi = true,
15961608
};
15971609

15981610
static const struct idle_cpu idle_cpu_srf __initconst = {
15991611
.state_table = srf_cstates,
16001612
.disable_promotion_to_c1e = true,
1613+
.c1_demotion_supported = true,
16011614
.use_acpi = true,
16021615
};
16031616

@@ -2295,6 +2308,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
22952308
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
22962309
}
22972310

2311+
static void intel_c1_demotion_toggle(void *enable)
2312+
{
2313+
unsigned long long msr_val;
2314+
2315+
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2316+
/*
2317+
* Enable/disable C1 undemotion along with C1 demotion, as this is the
2318+
* most sensible configuration in general.
2319+
*/
2320+
if (enable)
2321+
msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
2322+
else
2323+
msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
2324+
wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2325+
}
2326+
2327+
static ssize_t intel_c1_demotion_store(struct device *dev,
2328+
struct device_attribute *attr,
2329+
const char *buf, size_t count)
2330+
{
2331+
bool enable;
2332+
int err;
2333+
2334+
err = kstrtobool(buf, &enable);
2335+
if (err)
2336+
return err;
2337+
2338+
mutex_lock(&c1_demotion_mutex);
2339+
/* Enable/disable C1 demotion on all CPUs */
2340+
on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
2341+
mutex_unlock(&c1_demotion_mutex);
2342+
2343+
return count;
2344+
}
2345+
2346+
static ssize_t intel_c1_demotion_show(struct device *dev,
2347+
struct device_attribute *attr, char *buf)
2348+
{
2349+
unsigned long long msr_val;
2350+
2351+
/*
2352+
* Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2353+
* configuration would be a BIOS bug.
2354+
*/
2355+
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2356+
return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
2357+
}
2358+
static DEVICE_ATTR_RW(intel_c1_demotion);
2359+
2360+
static int __init intel_idle_sysfs_init(void)
2361+
{
2362+
int err;
2363+
2364+
if (!c1_demotion_supported)
2365+
return 0;
2366+
2367+
sysfs_root = bus_get_dev_root(&cpu_subsys);
2368+
if (!sysfs_root)
2369+
return 0;
2370+
2371+
err = sysfs_add_file_to_group(&sysfs_root->kobj,
2372+
&dev_attr_intel_c1_demotion.attr,
2373+
"cpuidle");
2374+
if (err) {
2375+
put_device(sysfs_root);
2376+
return err;
2377+
}
2378+
2379+
return 0;
2380+
}
2381+
2382+
static void __init intel_idle_sysfs_uninit(void)
2383+
{
2384+
if (!sysfs_root)
2385+
return;
2386+
2387+
sysfs_remove_file_from_group(&sysfs_root->kobj,
2388+
&dev_attr_intel_c1_demotion.attr,
2389+
"cpuidle");
2390+
put_device(sysfs_root);
2391+
}
2392+
22982393
static int __init intel_idle_init(void)
22992394
{
23002395
const struct x86_cpu_id *id;
@@ -2344,6 +2439,8 @@ static int __init intel_idle_init(void)
23442439
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
23452440
if (icpu->disable_promotion_to_c1e)
23462441
c1e_promotion = C1E_PROMOTION_DISABLE;
2442+
if (icpu->c1_demotion_supported)
2443+
c1_demotion_supported = true;
23472444
if (icpu->use_acpi || force_use_acpi)
23482445
intel_idle_acpi_cst_extract();
23492446
} else if (!intel_idle_acpi_cst_extract()) {
@@ -2357,6 +2454,10 @@ static int __init intel_idle_init(void)
23572454
if (!intel_idle_cpuidle_devices)
23582455
return -ENOMEM;
23592456

2457+
retval = intel_idle_sysfs_init();
2458+
if (retval)
2459+
pr_warn("failed to initialized sysfs");
2460+
23602461
intel_idle_cpuidle_driver_init(&intel_idle_driver);
23612462

23622463
retval = cpuidle_register_driver(&intel_idle_driver);
@@ -2381,6 +2482,7 @@ static int __init intel_idle_init(void)
23812482
intel_idle_cpuidle_devices_uninit();
23822483
cpuidle_unregister_driver(&intel_idle_driver);
23832484
init_driver_fail:
2485+
intel_idle_sysfs_uninit();
23842486
free_percpu(intel_idle_cpuidle_devices);
23852487
return retval;
23862488

0 commit comments

Comments
 (0)