// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/timer.h>
#include <linux/acpi_pmtmr.h>
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/clocksource.h>
#include <linux/percpu.h>
#include <linux/timex.h>
#include <linux/static_key.h>
#include <linux/static_call.h>
#include <asm/cpuid/api.h>
#include <asm/hpet.h>
#include <asm/timer.h>
#include <asm/vgtod.h>
#include <asm/time.h>
#include <asm/delay.h>
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/i8259.h>
#include <asm/msr.h>
#include <asm/topology.h>
#include <asm/uv/uv.h>
#include <asm/sev.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz);
#define KHZ 1000
/*
* TSC can be unstable due to cpufreq or due to unsynced TSCs
*/
static int __read_mostly tsc_unstable;
static unsigned int __initdata tsc_early_khz;
static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc);
int tsc_clocksource_reliable;
static int __read_mostly tsc_force_recalibrate;
static struct clocksource_base art_base_clk = {
.id = CSID_X86_ART,
};
static bool have_art;
struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
seqcount_latch_t seq; /* 32 + 4 = 36 */
}; /* fits one cacheline */
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
static int __init tsc_early_khz_setup(char *buf)
{
return kstrtouint(buf, 0, &tsc_early_khz);
}
early_param("tsc_early_khz", tsc_early_khz_setup);
__always_inline void __cyc2ns_read(struct cyc2ns_data *data)
{
int seq, idx;
do {
seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}
__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
preempt_disable_notrace();
__cyc2ns_read(data);
}
__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
/*
* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
* ns = cycles * (10^9 / (cpu_khz * 10^3))
* ns = cycles * (10^6 / cpu_khz)
*
* Then we use scaling math (suggested by george@mvista.com) to get:
* ns = cycles * (10^6 * SC / cpu_khz) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* into a shift. The larger SC is, the more accurate the conversion, but
* cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
* (64-bit result) can be used.
*
* We can use khz divisor instead of mhz to keep a better precision.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
__cyc2ns_read(&data);
ns = data.cyc2ns_offset;
ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
return ns;
}
static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
unsigned long long ns;
preempt_disable_notrace();
ns = __cycles_2_ns(cyc);
preempt_enable_notrace();
return ns;
}
static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
unsigned long long ns_now;
struct cyc2ns_data data;
struct cyc2ns *c2n;
ns_now = cycles_2_ns(tsc_now);
/*
* Compute a new multiplier as per the above comment and ensure our
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
NSEC_PER_MSEC, 0);
/*
* cyc2ns_shift is exported via arch_perf_update_userpage() where it is
* not expected to be greater than 31 due to the original published
* conversion algorithm shifting a 32-bit value (now specifies a 64-bit
* value) - refer perf_event_mmap_page documentation in perf_event.h.
*/
if (data.cyc2ns_shift == 32) {
data.cyc2ns_shift = 31;
data.cyc2ns_mul >>= 1;
}
data.cyc2ns_offset = ns_now -
mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
c2n = per_cpu_ptr(&cyc2ns, cpu);
write_seqcount_latch_begin(&c2n->seq);
c2n->data[0] = data;
write_seqcount_latch(&c2n->seq);
c2n->data[1] = data;
write_seqcount_latch_end(&c2n->seq);
}
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned lo
|