// SPDX-License-Identifier: GPL-2.0-only
/*
* Common code for Intel Running Average Power Limit (RAPL) support.
* Copyright (c) 2019, Intel Corporation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/log2.h>
#include <linux/bitmap.h>
#include <linux/delay.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/powercap.h>
#include <linux/suspend.h>
#include <linux/intel_rapl.h>
#include <linux/processor.h>
#include <linux/platform_device.h>
#include <asm/iosf_mbi.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
#define POWER_LIMIT1_MASK 0x7FFF
#define POWER_LIMIT1_ENABLE BIT(15)
#define POWER_LIMIT1_CLAMP BIT(16)
#define POWER_LIMIT2_MASK (0x7FFFULL<<32)
#define POWER_LIMIT2_ENABLE BIT_ULL(47)
#define POWER_LIMIT2_CLAMP BIT_ULL(48)
#define POWER_HIGH_LOCK BIT_ULL(63)
#define POWER_LOW_LOCK BIT(31)
#define POWER_LIMIT4_MASK 0x1FFF
#define TIME_WINDOW1_MASK (0x7FULL<<17)
#define TIME_WINDOW2_MASK (0x7FULL<<49)
#define POWER_UNIT_OFFSET 0
#define POWER_UNIT_MASK 0x0F
#define ENERGY_UNIT_OFFSET 0x08
#define ENERGY_UNIT_MASK 0x1F00
#define TIME_UNIT_OFFSET 0x10
#define TIME_UNIT_MASK 0xF0000
#define POWER_INFO_MAX_MASK (0x7fffULL<<32)
#define POWER_INFO_MIN_MASK (0x7fffULL<<16)
#define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
#define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
#define PP_POLICY_MASK 0x1F
/*
* SPR has different layout for Psys Domain PowerLimit registers.
* There are 17 bits of PL1 and PL2 instead of 15 bits.
* The Enable bits and TimeWindow bits are also shifted as a result.
*/
#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
/* Non HW constants */
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2)
#define TIME_WINDOW_MAX_MSEC 40000
#define TIME_WINDOW_MIN_MSEC 250
#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
enum unit_type {
ARBITRARY_UNIT, /* no translation */
POWER_UNIT,
ENERGY_UNIT,
TIME_UNIT,
};
/* per domain data, some are optional */
#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
#define DOMAIN_STATE_INACTIVE BIT(0)
#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
#define DOMAIN_STATE_BIOS_LOCKED BIT(2)
static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
static const char pl4_name[] = "peak_power";
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
struct rapl_defaults {
u8 floor_freq_reg_addr;
int (*check_unit)(struct rapl_package *rp, int cpu);
void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
bool to_raw);
unsigned int dram_domain_energy_unit;
unsigned int psys_domain_energy_unit;
bool spr_psys_bits;
};
static struct rapl_defaults *rapl_defaults;
/* Sideband MBI registers */
#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
#define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
#define PACKAGE_PLN_INT_SAVED BIT(0)
#define MAX_PRIM_NAME (32)
/* per domain data. used to describe individual knobs such that access function
* can be consolidated into one instead of many inline functions.
*/
struct rapl_primitive_info {
const char *name;
u64 mask;
int shift;
enum rapl_domain_reg_id id;
enum unit_type unit;
u32 flag;
};
#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
.name = #p, \
.mask = m, \
.shift = s, \
.id = i, \
.unit = u, \
.flag = f \
}
static void rapl_init_domains(struct rapl_package *rp);
static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
bool xlate, u64 *data);
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
static u64 rapl_unit_xlate(struct rapl_domain *rd,
enum unit_type type, u64 value, int to_raw);
static void package_power_limit_irq_save(struct rapl_package *rp);
static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
static const char *const rapl_domain_names[] = {
"package",
"core",
"uncore",
"dram",
"psys",
};
static int get_energy_counter(struct powercap_zone *power_zone,
u64 *energy_raw)
{
struct rapl_domain *rd;
u64 energy_now;
/* prevent CPU hotplug, make sure the RAPL domain does not go
* away while reading the counter.
*/
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
*energy_raw = energy_now;
cpus_read_unlock();
return 0;
}
cpus_read_unlock();
return -EIO;
}
static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
*energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
return 0;
}
static int release_zone(struct powercap_zone *power_zone)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
struct rapl_package *rp = rd->rp;
/* package zone is the last zone of a package, we can free
* memory here since all children has been unregistered.
*/
if (rd->id == RAPL_DOMAIN_PACKAGE) {
kfree(rd);
rp->domains = NULL;
}
return 0;
}
static int find_nr_power_limit(struct rapl_domain *rd)
{
int i, nr_pl = 0;
for (i = 0; i < NR_POWER_LIMITS; i++) {
if (rd->rpl[i].name)
nr_pl++;
}
return nr_pl;
}
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
cpus_read_unlock();
return 0;
}
static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
u64 val;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
|