// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/syscalls.c
*
* Core kernel scheduler syscalls related code
*
* Copyright (C) 1991-2002 Linus Torvalds
* Copyright (C) 1998-2024 Ingo Molnar, Red Hat
*/
#include <linux/sched.h>
#include <linux/cpuset.h>
#include <linux/sched/debug.h>
#include <uapi/linux/sched/types.h>
#include "sched.h"
#include "autogroup.h"
static inline int __normal_prio(int policy, int rt_prio, int nice)
{
int prio;
if (dl_policy(policy))
prio = MAX_DL_PRIO - 1;
else if (rt_policy(policy))
prio = MAX_RT_PRIO - 1 - rt_prio;
else
prio = NICE_TO_PRIO(nice);
return prio;
}
/*
* Calculate the expected normal priority: i.e. priority
* without taking RT-inheritance into account. Might be
* boosted by interactivity modifiers. Changes upon fork,
* setprio syscalls, and whenever the interactivity
* estimator recalculates.
*/
static inline int normal_prio(struct task_struct *p)
{
return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
}
/*
* Calculate the current priority, i.e. the priority
* taken into account by the scheduler. This value might
* be boosted by RT tasks, or might be boosted by
* interactivity modifiers. Will be RT if the task got
* RT-boosted. If not then it returns p->normal_prio.
*/
static int effective_prio(struct task_struct *p)
{
p->normal_prio = normal_prio(p);
/*
* If we are RT tasks or we were boosted to RT priority,
* keep the priority unchanged. Otherwise, update priority
* to the normal priority:
*/
if (!rt_or_dl_prio(p->prio))
return p->normal_prio;
return p->prio;
}
void set_user_nice(struct task_struct *p, long nice)
{
bool queued, running;
struct rq *rq;
int old_prio;
if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
return;
/*
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
CLASS(task_rq_lock, rq_guard)(p);
rq = rq_guard.rq;
update_rq_clock(rq);
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it won't have any effect on scheduling until the task is
* SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
*/
if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
p->static_prio = NICE_TO_PRIO(nice);
return;
}
queued = task_on_rq_queued(p);
running = task_current_donor(rq, p);
if (queued)
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
if (running)
put_prev_task(rq, p);
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p, true);
old_prio = p->prio;
p->prio = effective_prio(p);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
if (running)
set_next_task(rq, p);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
*/
p->sched_class->prio_changed(rq, p, old_prio);
}
EXPORT_SYMBOL(set_user_nice);
/*
* is_nice_reduction - check if nice value is an actual reduction
*
* Similar to can_nice() but does not perform a capability check.
*
* @p: task
* @nice: nice value
*/
static bool is_nice_reduction(const struct task_struct *p, const int nice)
{
/* Convert nice value [19,-20] to rlimit style value [1,40]: */
int nice_rlim = nice_to_rlimit(nice);
return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
}
/*
* can_nice - check if a task can reduce its nice value
* @p: task
* @nice: nice value
*/
int can_nice(const struct task_struct *p, const int nice)
{
return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
}
#ifdef __ARCH_WANT_SYS_NICE
/*
* sys_nice - change the priority of the current process.
* @increment: priority increment
*
* sys_setpriority is a more generic, but much slower function that
* does similar things.
*/
SYSCALL_DEFINE1(nice, int, increment)
{
long nice, retval;
/*
* Setpriority might change our priority at the same moment.
* We don't have to worry. Conceptually one call occurs first
* and we have a single winner.
*/
increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
nice = task_nice(current) + increment;
nice = clamp_val(nice, MIN_NICE, MAX_NICE);
if (increment < 0 && !can_nice(current, nice))
return -EPERM;
retval = security_task_setnice(current, nice);
if (retval)
return retval;
set_user_nice(current, nice);
return 0;
}
#endif /* __ARCH_WANT_SYS_NICE */
/**
* task_prio - return the priority value of a given task.
* @p: the task in question.
*
* Return: The priority value as seen by users in /proc.
*
* sched policy return value kernel prio user prio/nice
*
* normal, batch, idle [0 ... 39] [100 ... 139] 0/[-20 ... 19]
* fifo, rr [-2 ... -100] [98 ... 0] [1 ... 99]
* deadline -101 -1 0
*/
int task_prio(const struct task_struct *p)
{
return p->prio - MAX_RT_PRIO;
}
/**
* idle_cpu - is a given CPU idle currently?
* @cpu: the processor in question.
*
* Return: 1 if the CPU is currently idle. 0 otherwise.
*/
int idle_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
if (rq->curr != rq->idle)
return 0;
if (rq->nr_running)
return 0;
if (rq->ttwu_pending)
return 0;
return 1;
}
/**
* available_idle_cpu - is a given CPU idle for enqueuing work.
* @cpu: the CPU in question.
*
* Return: 1 if the CPU is currently idle. 0 otherwise.
*/
int available_idle_cpu(int cpu)
{
if (!idle_cpu(cpu))
return 0;
if (vcpu_is_preempted(cpu))
return 0;
return 1;
}
/**
* idle_task - return the idle task for a given CPU.
* @cpu: the processor in question.
*
* Return: The idle task for the CPU @cpu.
*/
struct task_struct *idle_task(int cpu)
{
return cpu_rq(cpu)->idle;
}
#ifdef CONFIG_SCHED_CORE
int sched
|