aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/thermal
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 11:40:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 11:40:06 -0700
commit07abb19a9b201c11e4367e8a428be7235b6dbd0d (patch)
tree8e0a08bce75f5e91a5b40c9f74ecd7f722295160 /drivers/thermal
parenta070a08d006d142e2ae0bf73843dc90c2b42b02f (diff)
parent866b554c2d3e067751cc2cbad9ed281db2d47143 (diff)
downloadlinux-07abb19a9b201c11e4367e8a428be7235b6dbd0d.tar.gz
Merge tag 'pm-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "From the functional perspective, the most significant change here is the addition of support for Energy Models that can be updated dynamically at run time. There is also the addition of LZ4 compression support for hibernation, the new preferred core support in amd-pstate, new platforms support in the Intel RAPL driver, new model-specific EPP handling in intel_pstate and more. Apart from that, the cpufreq default transition delay is reduced from 10 ms to 2 ms (along with some related adjustments), the system suspend statistics code undergoes a significant rework and there is a usual bunch of fixes and code cleanups all over. Specifics: - Allow the Energy Model to be updated dynamically (Lukasz Luba) - Add support for LZ4 compression algorithm to the hibernation image creation and loading code (Nikhil V) - Fix and clean up system suspend statistics collection (Rafael Wysocki) - Simplify device suspend and resume handling in the power management core code (Rafael Wysocki) - Fix PCI hibernation support description (Yiwei Lin) - Make hibernation take set_memory_ro() return values into account as appropriate (Christophe Leroy) - Set mem_sleep_current during kernel command line setup to avoid an ordering issue with handling it (Maulik Shah) - Fix wake IRQs handling when pm_runtime_force_suspend() is used as a driver's system suspend callback (Qingliang Li) - Simplify pm_runtime_get_if_active() usage and add a replacement for pm_runtime_put_autosuspend() (Sakari Ailus) - Add a tracepoint for runtime_status changes tracking (Vilas Bhat) - Fix section title markdown in the runtime PM documentation (Yiwei Lin) - Enable preferred core support in the amd-pstate cpufreq driver (Meng Li) - Fix min_perf assignment in amd_pstate_adjust_perf() and make the min/max limit perf values in amd-pstate always stay within the (highest perf, lowest perf) range (Tor Vic, Meng Li) - Allow intel_pstate to assign model-specific values to strings used in the EPP sysfs interface and make it do so on Meteor Lake (Srinivas Pandruvada) - Drop long-unused cpudata::prev_cummulative_iowait from the intel_pstate cpufreq driver (Jiri Slaby) - Prevent scaling_cur_freq from exceeding scaling_max_freq when the latter is an inefficient frequency (Shivnandan Kumar) - Change default transition delay in cpufreq to 2ms (Qais Yousef) - Remove references to 10ms minimum sampling rate from comments in the cpufreq code (Pierre Gondois) - Honour transition_latency over transition_delay_us in cpufreq (Qais Yousef) - Stop unregistering cpufreq cooling on CPU hot-remove (Viresh Kumar) - General enhancements / cleanups to ARM cpufreq drivers (tianyu2, NĂ­colas F. R. A. Prado, Erick Archer, Arnd Bergmann, Anastasia Belova) - Update cpufreq-dt-platdev to block/approve devices (Richard Acayan) - Make the SCMI cpufreq driver get a transition delay value from firmware (Pierre Gondois) - Prevent the haltpoll cpuidle governor from shrinking guest poll_limit_ns below grow_start (Parshuram Sangle) - Avoid potential overflow in integer multiplication when computing cpuidle state parameters (C Cheng) - Adjust MWAIT hint target C-state computation in the ACPI cpuidle driver and in intel_idle to return a correct value for C0 (He Rongguang) - Address multiple issues in the TPMI RAPL driver and add support for new platforms (Lunar Lake-M, Arrow Lake) to Intel RAPL (Zhang Rui) - Fix freq_qos_add_request() return value check in dtpm_cpu (Daniel Lezcano) - Fix kernel-doc for dtpm_create_hierarchy() (Yang Li) - Fix file leak in get_pkg_num() in x86_energy_perf_policy (Samasth Norway Ananda) - Fix cpupower-frequency-info.1 man page typo (Jan Kratochvil) - Fix a couple of warnings in the OPP core code related to W=1 builds (Viresh Kumar) - Move dev_pm_opp_{init|free}_cpufreq_table() to pm_opp.h (Viresh Kumar) - Extend dev_pm_opp_data with turbo support (Sibi Sankar) - dt-bindings: drop maxItems from inner items (David Heidelberg)" * tag 'pm-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (95 commits) dt-bindings: opp: drop maxItems from inner items OPP: debugfs: Fix warning around icc_get_name() OPP: debugfs: Fix warning with W=1 builds cpufreq: Move dev_pm_opp_{init|free}_cpufreq_table() to pm_opp.h OPP: Extend dev_pm_opp_data with turbo support Fix cpupower-frequency-info.1 man page typo cpufreq: scmi: Set transition_delay_us firmware: arm_scmi: Populate fast channel rate_limit firmware: arm_scmi: Populate perf commands rate_limit cpuidle: ACPI/intel: fix MWAIT hint target C-state computation PM: sleep: wakeirq: fix wake irq warning in system suspend powercap: dtpm: Fix kernel-doc for dtpm_create_hierarchy() function cpufreq: Don't unregister cpufreq cooling on CPU hotplug PM: suspend: Set mem_sleep_current during kernel command line setup cpufreq: Honour transition_latency over transition_delay_us cpufreq: Limit resolving a frequency to policy min/max Documentation: PM: Fix runtime_pm.rst markdown syntax cpufreq: amd-pstate: adjust min/max limit perf cpufreq: Remove references to 10ms min sampling rate cpufreq: intel_pstate: Update default EPPs for Meteor Lake ...
Diffstat (limited to 'drivers/thermal')
-rw-r--r--drivers/thermal/cpufreq_cooling.c45
-rw-r--r--drivers/thermal/devfreq_cooling.c49
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c8
3 files changed, 81 insertions, 21 deletions
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index e2cc7bd308620..9d1b1459700df 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -91,12 +91,16 @@ struct cpufreq_cooling_device {
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
unsigned int freq)
{
+ struct em_perf_state *table;
int i;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(cpufreq_cdev->em);
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
- if (freq > cpufreq_cdev->em->table[i].frequency)
+ if (freq > table[i].frequency)
break;
}
+ rcu_read_unlock();
return cpufreq_cdev->max_level - i - 1;
}
@@ -104,16 +108,20 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
u32 freq)
{
+ struct em_perf_state *table;
unsigned long power_mw;
int i;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(cpufreq_cdev->em);
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
- if (freq > cpufreq_cdev->em->table[i].frequency)
+ if (freq > table[i].frequency)
break;
}
- power_mw = cpufreq_cdev->em->table[i + 1].power;
+ power_mw = table[i + 1].power;
power_mw /= MICROWATT_PER_MILLIWATT;
+ rcu_read_unlock();
return power_mw;
}
@@ -121,18 +129,24 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
u32 power)
{
+ struct em_perf_state *table;
unsigned long em_power_mw;
+ u32 freq;
int i;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(cpufreq_cdev->em);
for (i = cpufreq_cdev->max_level; i > 0; i--) {
/* Convert EM power to milli-Watts to make safe comparison */
- em_power_mw = cpufreq_cdev->em->table[i].power;
+ em_power_mw = table[i].power;
em_power_mw /= MICROWATT_PER_MILLIWATT;
if (power >= em_power_mw)
break;
}
+ freq = table[i].frequency;
+ rcu_read_unlock();
- return cpufreq_cdev->em->table[i].frequency;
+ return freq;
}
/**
@@ -262,8 +276,9 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
static int cpufreq_state2power(struct thermal_cooling_device *cdev,
unsigned long state, u32 *power)
{
- unsigned int freq, num_cpus, idx;
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+ unsigned int freq, num_cpus, idx;
+ struct em_perf_state *table;
/* Request state should be less than max_level */
if (state > cpufreq_cdev->max_level)
@@ -272,7 +287,12 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
idx = cpufreq_cdev->max_level - state;
- freq = cpufreq_cdev->em->table[idx].frequency;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd(cpufreq_cdev->em);
+ freq = table[idx].frequency;
+ rcu_read_unlock();
+
*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
return 0;
@@ -378,8 +398,17 @@ static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
/* Use the Energy Model table if available */
if (cpufreq_cdev->em) {
+ struct em_perf_state *table;
+ unsigned int freq;
+
idx = cpufreq_cdev->max_level - state;
- return cpufreq_cdev->em->table[idx].frequency;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd(cpufreq_cdev->em);
+ freq = table[idx].frequency;
+ rcu_read_unlock();
+
+ return freq;
}
#endif
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 262e62ab6cf2f..50dec24e967a0 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -87,6 +87,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
struct devfreq_cooling_device *dfc = cdev->devdata;
struct devfreq *df = dfc->devfreq;
struct device *dev = df->dev.parent;
+ struct em_perf_state *table;
unsigned long freq;
int perf_idx;
@@ -100,7 +101,11 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
if (dfc->em_pd) {
perf_idx = dfc->max_state - state;
- freq = dfc->em_pd->table[perf_idx].frequency * 1000;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd(dfc->em_pd);
+ freq = table[perf_idx].frequency * 1000;
+ rcu_read_unlock();
} else {
freq = dfc->freq_table[state];
}
@@ -123,14 +128,21 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
*/
static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq)
{
- int i;
+ struct em_perf_state *table;
+ int i, idx = -EINVAL;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(em_pd);
for (i = 0; i < em_pd->nr_perf_states; i++) {
- if (em_pd->table[i].frequency == freq)
- return i;
+ if (table[i].frequency != freq)
+ continue;
+
+ idx = i;
+ break;
}
+ rcu_read_unlock();
- return -EINVAL;
+ return idx;
}
static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
@@ -181,6 +193,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
struct devfreq_cooling_device *dfc = cdev->devdata;
struct devfreq *df = dfc->devfreq;
struct devfreq_dev_status status;
+ struct em_perf_state *table;
unsigned long state;
unsigned long freq;
unsigned long voltage;
@@ -204,7 +217,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
state = dfc->capped_state;
/* Convert EM power into milli-Watts first */
- dfc->res_util = dfc->em_pd->table[state].power;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(dfc->em_pd);
+ dfc->res_util = table[state].power;
+ rcu_read_unlock();
+
dfc->res_util /= MICROWATT_PER_MILLIWATT;
dfc->res_util *= SCALE_ERROR_MITIGATION;
@@ -225,7 +242,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
_normalize_load(&status);
/* Convert EM power into milli-Watts first */
- *power = dfc->em_pd->table[perf_idx].power;
+ rcu_read_lock();
+ table = em_perf_state_from_pd(dfc->em_pd);
+ *power = table[perf_idx].power;
+ rcu_read_unlock();
+
*power /= MICROWATT_PER_MILLIWATT;
/* Scale power for utilization */
*power *= status.busy_time;
@@ -245,13 +266,19 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
unsigned long state, u32 *power)
{
struct devfreq_cooling_device *dfc = cdev->devdata;
+ struct em_perf_state *table;
int perf_idx;
if (state > dfc->max_state)
return -EINVAL;
perf_idx = dfc->max_state - state;
- *power = dfc->em_pd->table[perf_idx].power;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd(dfc->em_pd);
+ *power = table[perf_idx].power;
+ rcu_read_unlock();
+
*power /= MICROWATT_PER_MILLIWATT;
return 0;
@@ -264,6 +291,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
struct devfreq *df = dfc->devfreq;
struct devfreq_dev_status status;
unsigned long freq, em_power_mw;
+ struct em_perf_state *table;
s32 est_power;
int i;
@@ -288,13 +316,16 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
* Find the first cooling state that is within the power
* budget. The EM power table is sorted ascending.
*/
+ rcu_read_lock();
+ table = em_perf_state_from_pd(dfc->em_pd);
for (i = dfc->max_state; i > 0; i--) {
/* Convert EM power to milli-Watts to make safe comparison */
- em_power_mw = dfc->em_pd->table[i].power;
+ em_power_mw = table[i].power;
em_power_mw /= MICROWATT_PER_MILLIWATT;
if (est_power >= em_power_mw)
break;
}
+ rcu_read_unlock();
*state = dfc->max_state - i;
dfc->capped_state = *state;
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index 2f00fc3bf274a..e964a9375722a 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu)
if (topology_physical_package_id(cpu))
return 0;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
+ rp = rapl_add_package_cpuslocked(cpu, &rapl_mmio_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -42,14 +42,14 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true);
if (!rp)
return 0;
cpumask_clear_cpu(cpu, &rp->cpumask);
lead_cpu = cpumask_first(&rp->cpumask);
if (lead_cpu >= nr_cpu_ids)
- rapl_remove_package(rp);
+ rapl_remove_package_cpuslocked(rp);
else if (rp->lead_cpu == cpu)
rp->lead_cpu = lead_cpu;
return 0;