// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/pipe.c
*
* Copyright (C) 1991, 1992, 1999 Linus Torvalds
*/
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/log2.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/magic.h>
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include "internal.h"
/*
* New pipe buffers will be restricted to this size while the user is exceeding
* their pipe buffer quota. The general pipe use case needs at least two
* buffers: one for data yet to be read, and one for new data. If this is less
* than two, then a write to a non-empty pipe may block even if the pipe is not
* full. This can occur with GNU make jobserver or similar uses of pipes as
* semaphores: multiple processes may be waiting to write tokens back to the
* pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
*
* Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
* own risk, namely: pipe writes to non-full pipes may block until the pipe is
* emptied.
*/
#define PIPE_MIN_DEF_BUFFERS 2
/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
static unsigned int pipe_max_size = 1048576;
/* Maximum allocatable pages per user. Hard limit is unset by default, soft
* matches default values.
*/
static unsigned long pipe_user_pages_hard;
static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
/*
* We use head and tail indices that aren't masked off, except at the point of
* dereference, but rather they're allowed to wrap naturally. This means there
* isn't a dead spot in the buffer, but the ring has to be a power of two and
* <= 2^31.
* -- David Howells 2019-09-23.
*
* Reads with count = 0 should always return 0.
* -- Julian Bradfield 1999-06-07.
*
* FIFOs and Pipes now generate SIGIO for both readers and writers.
* -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
*
* pipe_read & write cleanup
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
#define cmp_int(l, r) ((l > r) - (l < r))
#ifdef CONFIG_PROVE_LOCKING
static int pipe_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
{
return cmp_int((unsigned long) a, (unsigned long) b);
}
#endif
void pipe_lock(struct pipe_inode_info *pipe)
{
if (pipe->files)
mutex_lock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_lock);
void pipe_unlock(struct pipe_inode_info *pipe)
{
if (pipe->files)
mutex_unlock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_unlock);
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);
if (pipe1 > pipe2)
swap(pipe1, pipe2);
pipe_lock(pipe1);
pipe_lock(pipe2);
}
static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
{
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (pipe->tmp_page[i]) {
struct page *page = pipe->tmp_page[i];
pipe->tmp_page[i] = NULL;
return page;
}
}
return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
}
static void anon_pipe_put_page(struct pipe_inode_info *pipe,
struct page *page)
{
if (page_count(page) == 1) {
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (!pipe->tmp_page[i]) {
pipe->tmp_page[i] = page;
return;
}
}
}
put_page(page);
}
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct page *page = buf->page;
anon_pipe_put_page(pipe, page);
}
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct page *page = buf->page;
if (page_count(page) != 1)
return false;
memcg_kmem_uncharge_page(page, 0);
__SetPageLocked(page);
return true;
}
/**
* generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to attempt to steal
*
* Description:
* This function attempts to steal the &struct page attached to
* @buf. If successful, this function returns 0 and returns with
* the page locked. The caller may then reuse the page for whatever
* he wishes; the typical use is insertion into a different file
* page cache.
*/
bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct page *page = buf->page;
/*
* A reference of one is golden, that means that the owner of this
* page is the only one holding a reference to it. lock the page
* and return OK.
*/
if (page_count(page) == 1) {
lock_page(page);
return true;
}
return false;
}
EXPORT_SYMBOL(generic_pipe_buf_try_steal);
/**
* generic_pipe_buf_get - get a reference to a &struct pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
*
* Description:
* This function grabs an extra reference to @buf. It's used in
* the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
/**
* generic_pipe_buf_release - put a reference to a &struct pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to put a reference to
*
* Description:
* This function releases a reference to @buf.
*/
void generic_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
put_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_release);
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.release = anon_pipe_buf_release,
.try_steal = anon_pipe_buf_try_steal,
.get = generic_pipe_buf_get,
};
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
{
union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) };
unsigned int writers = READ_ONCE(pipe->writers);
return !pipe_empty(idx.head, idx.tail) || !writers;
}
static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
struct pipe_buffer *buf,
unsigned int tail)
{
pipe_buf_release(pipe, buf);
/*
* If the pipe has a watch_queue, we need additional protection
* by the spinlock because notifications get posted with only
* this spinlock, no mutex
*/
if (pipe_has_watch_queue(pipe)) {
spin_lock_irq(&pipe->rd_wait.lock);
#ifdef CONFIG_WATCH_QUEUE
if (buf->flags & PIPE_BUF_FLAG_LOSS)
pipe->note_loss = true;
#endif
pipe->tail = ++tail;
spin_unlock_irq(&pipe->rd_wait.lock);
return tail;
}
/*
* Without a watch_queue, we can simply increment the tail
* without the spinlock - the mutex is enough.
*/
pipe->tail = ++tail;
return tail;
}
static ssize_t
anon_pipe_read(struct kiocb *iocb, struct iov_iter *to)
{
size_t total_len
|