// SPDX-License-Identifier: GPL-2.0-or-later
/*
* POWERNV cpufreq driver for the IBM POWER processors
*
* (C) Copyright IBM 2014
*
* Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
*/
#define pr_fmt(fmt) "powernv-cpufreq: " fmt
#include <linux/kernel.h>
#include <linux/sysfs.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/cpufreq.h>
#include <linux/smp.h>
#include <linux/of.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/hashtable.h>
#include <trace/events/power.h>
#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/reg.h>
#include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
#include <asm/opal.h>
#include <linux/timer.h>
#define POWERNV_MAX_PSTATES_ORDER 8
#define POWERNV_MAX_PSTATES (1UL << (POWERNV_MAX_PSTATES_ORDER))
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
#define MAX_PSTATE_SHIFT 32
#define LPSTATE_SHIFT 48
#define GPSTATE_SHIFT 56
#define MAX_NR_CHIPS 32
#define MAX_RAMP_DOWN_TIME 5120
/*
* On an idle system we want the global pstate to ramp-down from max value to
* min over a span of ~5 secs. Also we want it to initially ramp-down slowly and
* then ramp-down rapidly later on.
*
* This gives a percentage rampdown for time elapsed in milliseconds.
* ramp_down_percentage = ((ms * ms) >> 18)
* ~= 3.8 * (sec * sec)
*
* At 0 ms ramp_down_percent = 0
* At 5120 ms ramp_down_percent = 100
*/
#define ramp_down_percent(time) ((time * time) >> 18)
/* Interval after which the timer is queued to bring down global pstate */
#define GPSTATE_TIMER_INTERVAL 2000
/**
* struct global_pstate_info - Per policy data structure to maintain history of
* global pstates
* @highest_lpstate_idx: The local pstate index from which we are
* ramping down
* @elapsed_time: Time in ms spent in ramping down from
* highest_lpstate_idx
* @last_sampled_time: Time from boot in ms when global pstates were
* last set
* @last_lpstate_idx: Last set value of local pstate and global
* @last_gpstate_idx: pstate in terms of cpufreq table index
* @timer: Is used for ramping down if cpu goes idle for
* a long time with global pstate held high
* @gpstate_lock: A spinlock to maintain synchronization between
* routines called by the timer handler and
* governer's target_index calls
* @policy: Associated CPUFreq policy
*/
struct global_pstate_info {
int highest_lpstate_idx;
unsigned int elapsed_time;
unsigned int last_sampled_time;
int last_lpstate_idx;
int last_gpstate_idx;
spinlock_t gpstate_lock;
struct timer_list timer;
struct cpufreq_policy *policy;
};
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
static DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER);
/**
* struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap
* indexed by a function of pstate id.
*
* @pstate_id: pstate id for this entry.
*
* @cpufreq_table_idx: Index into the powernv_freqs
* cpufreq_frequency_table for frequency
* corresponding to pstate_id.
*
* @hentry: hlist_node that hooks this entry into the pstate_revmap
* hashtable
*/
struct pstate_idx_revmap_data {
u8 pstate_id;
unsigned int cpufreq_table_idx;
struct hlist_node hentry;
};
static bool rebooting, throttled, occ_reset;
static const char * const throttle_reason[] = {
"No throttling",
"Power Cap",
"Processor Over Temperature",
"Power Supply Failure",
"Over Current",
"OCC Reset"
};
enum throttle_reason_type {
NO_THROTTLE = 0,
POWERCAP,
CPU_OVERTEMP,
POWER_SUPPLY_FAILURE,
OVERCURRENT,
OCC_RESET_THROTTLE,
OCC_MAX_REASON
};
static struct chip {
unsigned int id;
bool throttled;
bool restore;
u8 throttle_reason;
cpumask_t mask;
struct work_struct throttle;
int throttle_turbo;
int throttle_sub_turbo;
int reason[OCC_MAX_REASON];
} *chips;
static int nr_chips;
static DEFINE_PER_CPU(struct chip *, chip_info);
/*
* Note:
* The set of pstates consists of contiguous integers.
* powernv_pstate_info stores the index of the frequency table for
* max, min and nominal frequencies. It also stores number of
* available frequencies.
*
* powernv_pstate_info.nominal indicates the index to the highest
* non-turbo frequency.
*/
static struct powernv_pstate_info {
unsigned int min;
unsigned int max;
unsigned int nominal;
unsigned int nr_pstates;
bool wof_enabled;
} powernv_pstate_info;
static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift)
{
return ((pmsr_val >> shift) & 0xFF);
}
#define extract_local_pstate(x) extract_pstate(x, LPSTATE_SHIFT)
#define extract_global_pstate(x) extract_pstate(x, GPSTATE_SHIFT)
#define extract_max_pstate(x) extract_pstate(x, MAX_PSTATE_SHIFT)
/* Use following functions for conversions between pstate_id and index */
/*
* idx_to_pstate : Returns the pstate id corresponding to the
* frequency in the cpufreq frequency table
* powernv_freqs indexed by @i.
*
* If @i is out of bound, this will return the pstate
* corresponding to the nominal frequency.
*/
static inline u8 idx_to_pstate(unsigned int i)
{
if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
pr_warn_once("idx_to_pstate: index %u is out of bound\n", i);
return powernv_freqs[powernv_pstate_info.nominal].driver_data;
}
return powernv_freqs[i].driver_data;
}
/*
* pstate_to_idx : Returns the index in the cpufreq frequencytable
* powernv_freqs for the frequency whose corresponding
* pstate id is @pstate.
*
* If no frequency corresponding to @pstate is found,
* this will return the index of the nominal
* frequency.
*/
static unsigned int pstate_to_idx(u8 pstate)
{
unsigned int key = pstate % POWERNV_MAX_PSTATES;
struct pstate_idx_revmap_data *revmap_data;
hash_for_each_possible(pstate_revmap, revmap_data