/*
* Intel Running Average Power Limit (RAPL) Driver
* Copyright (c) 2013, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/log2.h>
#include <linux/bitmap.h>
#include <linux/delay.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/powercap.h>
#include <asm/iosf_mbi.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
#define POWER_LIMIT1_MASK 0x7FFF
#define POWER_LIMIT1_ENABLE BIT(15)
#define POWER_LIMIT1_CLAMP BIT(16)
#define POWER_LIMIT2_MASK (0x7FFFULL<<32)
#define POWER_LIMIT2_ENABLE BIT_ULL(47)
#define POWER_LIMIT2_CLAMP BIT_ULL(48)
#define POWER_PACKAGE_LOCK BIT_ULL(63)
#define POWER_PP_LOCK BIT(31)
#define TIME_WINDOW1_MASK (0x7FULL<<17)
#define TIME_WINDOW2_MASK (0x7FULL<<49)
#define POWER_UNIT_OFFSET 0
#define POWER_UNIT_MASK 0x0F
#define ENERGY_UNIT_OFFSET 0x08
#define ENERGY_UNIT_MASK 0x1F00
#define TIME_UNIT_OFFSET 0x10
#define TIME_UNIT_MASK 0xF0000
#define POWER_INFO_MAX_MASK (0x7fffULL<<32)
#define POWER_INFO_MIN_MASK (0x7fffULL<<16)
#define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
#define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
#define PP_POLICY_MASK 0x1F
/* Non HW constants */
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2)
#define TIME_WINDOW_MAX_MSEC 40000
#define TIME_WINDOW_MIN_MSEC 250
#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
enum unit_type {
ARBITRARY_UNIT, /* no translation */
POWER_UNIT,
ENERGY_UNIT,
TIME_UNIT,
};
enum rapl_domain_type {
RAPL_DOMAIN_PACKAGE, /* entire package/socket */
RAPL_DOMAIN_PP0, /* core power plane */
RAPL_DOMAIN_PP1, /* graphics uncore */
RAPL_DOMAIN_DRAM,/* DRAM control_type */
RAPL_DOMAIN_MAX,
};
enum rapl_domain_msr_id {
RAPL_DOMAIN_MSR_LIMIT,
RAPL_DOMAIN_MSR_STATUS,
RAPL_DOMAIN_MSR_PERF,
RAPL_DOMAIN_MSR_POLICY,
RAPL_DOMAIN_MSR_INFO,
RAPL_DOMAIN_MSR_MAX,
};
/* per domain data, some are optional */
enum rapl_primitives {
ENERGY_COUNTER,
POWER_LIMIT1,
POWER_LIMIT2,
FW_LOCK,
PL1_ENABLE, /* power limit 1, aka long term */
PL1_CLAMP, /* allow frequency to go below OS request */
PL2_ENABLE, /* power limit 2, aka short term, instantaneous */
PL2_CLAMP,
TIME_WINDOW1, /* long term */
TIME_WINDOW2, /* short term */
THERMAL_SPEC_POWER,
MAX_POWER,
MIN_POWER,
MAX_TIME_WINDOW,
THROTTLED_TIME,
PRIORITY_LEVEL,
/* below are not raw primitive data */
AVERAGE_POWER,
NR_RAPL_PRIMITIVES,
};
#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
/* Can be expanded to include events, etc.*/
struct rapl_domain_data {
u64 primitives[NR_RAPL_PRIMITIVES];
unsigned long timestamp;
};
#define DOMAIN_STATE_INACTIVE BIT(0)
#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
#define DOMAIN_STATE_BIOS_LOCKED BIT(2)
#define NR_POWER_LIMITS (2)
struct rapl_power_limit {
struct powercap_zone_constraint *constraint;
int prim_id; /* primitive ID used to enable */
struct rapl_domain *domain;
const char *name;
};
static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
struct rapl_domain {
const char *name;
enum rapl_domain_type id;
int msrs[RAPL_DOMAIN_MSR_MAX];
struct powercap_zone power_zone;
struct rapl_domain_data rdd;
struct rapl_power_limit rpl[NR_POWER_LIMITS];
u64 attr_map; /* track capabilities */
unsigned int state;
unsigned int domain_energy_unit;
int package_id;
};
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
/* Each physical package contains multiple domains, these are the common
* data across RAPL domains within a package.
*/
struct rapl_package {
unsigned int id; /* physical package/socket id */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit;
unsigned int energy_unit;
unsigned int time_unit;
struct rapl_domain *domains; /* array of domains, sized at runtime */
struct powercap_zone *power_zone; /* keep track of parent zone */
int nr_cpus; /* active cpus on the package, topology info is lost during
* cpu hotplug. so we have to track ourselves.
*/
unsigned long power_limit_irq; /* keep track of package power limit
* notify interrupt enable status.
*/
struct list_head plist;
};
struct rapl_defaults {
u8 floor_freq_reg_addr;
int (*check_unit)(struct rapl_package *rp, int cpu);
void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
bool to_raw);
unsigned int dram_domain_energy_unit;
};
static struct rapl_defaults *rapl_defaults;
/* Sideband MBI registers */
#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
#define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
#define PACKAGE_PLN_INT_SAVED BIT(0)
#define MAX_PRIM_NAME (32)
/* per domain data. used to describe individual knobs such that access function
* can be consolidated into one instead of many inline functions.
*/
struct rapl_primitive_info {
const char *name;
u64 mask;
int shift;
enum rapl_domain_msr_id id;
enum unit_type unit;
u32 flag;
};
#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
.name = #p, \
.mask = m, \
.shift = s, \
.id = i, \
.unit = u, \
.flag = f \
}
static void rapl_ini
|