// SPDX-License-Identifier: GPL-2.0
/*
* Implement CPU time clocks for the POSIX clock interface.
*/
#include <linux/sched/signal.h>
#include <linux/sched/cputime.h>
#include <linux/posix-timers.h>
#include <linux/errno.h>
#include <linux/math64.h>
#include <linux/uaccess.h>
#include <linux/kernel_stat.h>
#include <trace/events/timer.h>
#include <linux/tick.h>
#include <linux/workqueue.h>
#include <linux/compat.h>
#include <linux/sched/deadline.h>
#include "posix-timers.h"
static void posix_cpu_timer_rearm(struct k_itimer *timer);
void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
{
posix_cputimers_init(pct);
if (cpu_limit != RLIM_INFINITY) {
pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
pct->timers_active = true;
}
}
/*
* Called after updating RLIMIT_CPU to run cpu timer and update
* tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
* necessary. Needs siglock protection since other code may update the
* expiration cache as well.
*/
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
u64 nsecs = rlim_new * NSEC_PER_SEC;
spin_lock_irq(&task->sighand->siglock);
set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
spin_unlock_irq(&task->sighand->siglock);
}
/*
* Functions for validating access to tasks.
*/
static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
{
const bool thread = !!CPUCLOCK_PERTHREAD(clock);
const pid_t upid = CPUCLOCK_PID(clock);
struct pid *pid;
if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
return NULL;
/*
* If the encoded PID is 0, then the timer is targeted at current
* or the process to which current belongs.
*/
if (upid == 0)
return thread ? task_pid(current) : task_tgid(current);
pid = find_vpid(upid);
if (!pid)
return NULL;
if (thread) {
struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
}
/*
* For clock_gettime(PROCESS) allow finding the process by
* with the pid of the current task. The code needs the tgid
* of the process so that pid_task(pid, PIDTYPE_TGID) can be
* used to find the process.
*/
if (gettime && (pid == task_pid(current)))
return task_tgid(current);
/*
* For processes require that pid identifies a process.
*/
return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
}
static inline int validate_clock_permissions(const clockid_t clock)
{
int ret;
rcu_read_lock();
ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
rcu_read_unlock();
return ret;
}
static inline enum pid_type clock_pid_type(const clockid_t clock)
{
return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
}
static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
{
return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
}
/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
{
u64 delta, incr, expires = timer->it.cpu.node.expires;
int i;
if (!timer->it_interval)
return expires;
if (now < expires)
return expires;
incr = timer->it_interval;
delta = now + incr - expires;
/* Don't use (incr*2 < delta), incr*2 might overflow. */
for (i = 0; incr < delta - incr; i++)
incr = incr << 1;
for (; i >= 0; incr >>= 1, i--) {
if (delta < incr)
continue;
timer->it.cpu.node.expires += incr;
timer->it_overrun += 1LL << i;
delta -= incr;
}
return timer->it.cpu.node.expires;
}
/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
{
return !(~pct->bases[CPUCLOCK_PROF].nextevt |
~pct->bases[CPUCLOCK_VIRT].nextevt |
~pct->bases[CPUCLOCK_SCHED].nextevt);
}
static int
posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
{
int error = validate_clock_permissions(which_clock);
if (!error) {
tp->tv_sec = 0;
tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
/*
* If sched_clock is using a cycle counter, we
* don't have any idea of its true resolution
* exported, but it is much more than 1s/HZ.
*/
tp->tv_nsec = 1;
}
}
return error;
}
static int
posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
{
int error = validate_clock_permissions(clock);
/*
* You can never reset a CPU clock, but we check for other errors
* in the call before failing with EPERM.
*/
return error ? : -EPERM;
}
/*
* Sample a per-thread clock for the given task. clkid is validated.
*/
static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
{
u64 utime, stime;
if (clkid == CPUCLOCK_SCHED)
return task_sched_runtime(p);
task_cputime(p, &utime, &stime);
switch (clkid) {
case CPUCLOCK_PROF:
return utime + stime;
case CPUCLOCK_VIRT:
return utime;
default:
WARN_ON_ONCE(1);
}
return 0;
}
static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
{
samples[CPUCLOCK_PROF] = stime + utime;
samples[CPUCLOCK_VIRT] = utime;
samples[CPUCLOCK_SCHED] = rtime;
}
static void task_sample_cputime(struct task_struct *p, u64 *samples)
{
u64 stime, utime;
task_cputime(p, &utime, &stime);
store_samples(samples, stime, utime, p->se.sum_exec_runtime);
}
static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
u64 *samples)
{
u64 stime, utime, rtime;
utime = atomic64_read(&at->utime);
stime = atomic64_read(&at->stime);
rtime = atomic64_read(&at->sum_exec_runtime);
store_samples(samples, stime, utime, rtime);
}
/*
* Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
* to avoid race conditions with concurrent updates to cputime.
*/
static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
{
u64 curr_cputime;
retry:
curr_cputime = atomic64_read(cputime);
if (sum_cputime > curr_cputime) {
if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
goto retry;
}
}
static void update_gt_cputime(stru
|