summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 20:34:42 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 20:34:42 -0800
commit21266b8df5224c4f677acf9f353eecc9094731f0 (patch)
tree53bccb221736248181993c6c1c70819b9613f3d1
parent5ab889facc6893e4a973d6ab5432550ef4f6ff09 (diff)
parent95b3cdafd7cb74414070893445a9b731793f7b55 (diff)
downloadlinux-21266b8df5224c4f677acf9f353eecc9094731f0.tar.gz
linux-21266b8df5224c4f677acf9f353eecc9094731f0.tar.bz2
linux-21266b8df5224c4f677acf9f353eecc9094731f0.zip
Merge tag 'AT_EXECVE_CHECK-v6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull AT_EXECVE_CHECK from Kees Cook: - Implement AT_EXECVE_CHECK flag to execveat(2) (Mickaël Salaün) - Implement EXEC_RESTRICT_FILE and EXEC_DENY_INTERACTIVE securebits (Mickaël Salaün) - Add selftests and samples for AT_EXECVE_CHECK (Mickaël Salaün) * tag 'AT_EXECVE_CHECK-v6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: ima: instantiate the bprm_creds_for_exec() hook samples/check-exec: Add an enlighten "inc" interpreter and 28 tests selftests: ktap_helpers: Fix uninitialized variable samples/check-exec: Add set-exec selftests/landlock: Add tests for execveat + AT_EXECVE_CHECK selftests/exec: Add 32 tests for AT_EXECVE_CHECK and exec securebits security: Add EXEC_RESTRICT_FILE and EXEC_DENY_INTERACTIVE securebits exec: Add a new AT_EXECVE_CHECK flag to execveat(2)
-rw-r--r--Documentation/userspace-api/check_exec.rst144
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--fs/exec.c20
-rw-r--r--include/linux/binfmts.h7
-rw-r--r--include/uapi/linux/audit.h1
-rw-r--r--include/uapi/linux/fcntl.h4
-rw-r--r--include/uapi/linux/securebits.h24
-rw-r--r--samples/Kconfig9
-rw-r--r--samples/Makefile1
-rw-r--r--samples/check-exec/.gitignore2
-rw-r--r--samples/check-exec/Makefile15
-rw-r--r--samples/check-exec/inc.c205
-rwxr-xr-xsamples/check-exec/run-script-ask.inc9
-rwxr-xr-xsamples/check-exec/script-ask.inc5
-rwxr-xr-xsamples/check-exec/script-exec.inc4
-rw-r--r--samples/check-exec/script-noexec.inc4
-rw-r--r--samples/check-exec/set-exec.c85
-rw-r--r--security/commoncap.c29
-rw-r--r--security/integrity/ima/ima_appraise.c27
-rw-r--r--security/integrity/ima/ima_main.c29
-rw-r--r--security/security.c10
-rw-r--r--tools/testing/selftests/exec/.gitignore4
-rw-r--r--tools/testing/selftests/exec/Makefile19
-rwxr-xr-xtools/testing/selftests/exec/check-exec-tests.sh205
-rw-r--r--tools/testing/selftests/exec/check-exec.c456
-rw-r--r--tools/testing/selftests/exec/config2
-rw-r--r--tools/testing/selftests/exec/false.c5
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh2
-rw-r--r--tools/testing/selftests/landlock/fs_test.c27
29 files changed, 1341 insertions, 14 deletions
diff --git a/Documentation/userspace-api/check_exec.rst b/Documentation/userspace-api/check_exec.rst
new file mode 100644
index 000000000000..05dfe3b56f71
--- /dev/null
+++ b/Documentation/userspace-api/check_exec.rst
@@ -0,0 +1,144 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright © 2024 Microsoft Corporation
+
+===================
+Executability check
+===================
+
+The ``AT_EXECVE_CHECK`` :manpage:`execveat(2)` flag, and the
+``SECBIT_EXEC_RESTRICT_FILE`` and ``SECBIT_EXEC_DENY_INTERACTIVE`` securebits
+are intended for script interpreters and dynamic linkers to enforce a
+consistent execution security policy handled by the kernel. See the
+`samples/check-exec/inc.c`_ example.
+
+Whether an interpreter should check these securebits or not depends on the
+security risk of running malicious scripts with respect to the execution
+environment, and whether the kernel can check if a script is trustworthy or
+not. For instance, Python scripts running on a server can use arbitrary
+syscalls and access arbitrary files. Such interpreters should then be
+enlighten to use these securebits and let users define their security policy.
+However, a JavaScript engine running in a web browser should already be
+sandboxed and then should not be able to harm the user's environment.
+
+Script interpreters or dynamic linkers built for tailored execution environments
+(e.g. hardened Linux distributions or hermetic container images) could use
+``AT_EXECVE_CHECK`` without checking the related securebits if backward
+compatibility is handled by something else (e.g. atomic update ensuring that
+all legitimate libraries are allowed to be executed). It is then recommended
+for script interpreters and dynamic linkers to check the securebits at run time
+by default, but also to provide the ability for custom builds to behave like if
+``SECBIT_EXEC_RESTRICT_FILE`` or ``SECBIT_EXEC_DENY_INTERACTIVE`` were always
+set to 1 (i.e. always enforce restrictions).
+
+AT_EXECVE_CHECK
+===============
+
+Passing the ``AT_EXECVE_CHECK`` flag to :manpage:`execveat(2)` only performs a
+check on a regular file and returns 0 if execution of this file would be
+allowed, ignoring the file format and then the related interpreter dependencies
+(e.g. ELF libraries, script's shebang).
+
+Programs should always perform this check to apply kernel-level checks against
+files that are not directly executed by the kernel but passed to a user space
+interpreter instead. All files that contain executable code, from the point of
+view of the interpreter, should be checked. However the result of this check
+should only be enforced according to ``SECBIT_EXEC_RESTRICT_FILE`` or
+``SECBIT_EXEC_DENY_INTERACTIVE.``.
+
+The main purpose of this flag is to improve the security and consistency of an
+execution environment to ensure that direct file execution (e.g.
+``./script.sh``) and indirect file execution (e.g. ``sh script.sh``) lead to
+the same result. For instance, this can be used to check if a file is
+trustworthy according to the caller's environment.
+
+In a secure environment, libraries and any executable dependencies should also
+be checked. For instance, dynamic linking should make sure that all libraries
+are allowed for execution to avoid trivial bypass (e.g. using ``LD_PRELOAD``).
+For such secure execution environment to make sense, only trusted code should
+be executable, which also requires integrity guarantees.
+
+To avoid race conditions leading to time-of-check to time-of-use issues,
+``AT_EXECVE_CHECK`` should be used with ``AT_EMPTY_PATH`` to check against a
+file descriptor instead of a path.
+
+SECBIT_EXEC_RESTRICT_FILE and SECBIT_EXEC_DENY_INTERACTIVE
+==========================================================
+
+When ``SECBIT_EXEC_RESTRICT_FILE`` is set, a process should only interpret or
+execute a file if a call to :manpage:`execveat(2)` with the related file
+descriptor and the ``AT_EXECVE_CHECK`` flag succeed.
+
+This secure bit may be set by user session managers, service managers,
+container runtimes, sandboxer tools... Except for test environments, the
+related ``SECBIT_EXEC_RESTRICT_FILE_LOCKED`` bit should also be set.
+
+Programs should only enforce consistent restrictions according to the
+securebits but without relying on any other user-controlled configuration.
+Indeed, the use case for these securebits is to only trust executable code
+vetted by the system configuration (through the kernel), so we should be
+careful to not let untrusted users control this configuration.
+
+However, script interpreters may still use user configuration such as
+environment variables as long as it is not a way to disable the securebits
+checks. For instance, the ``PATH`` and ``LD_PRELOAD`` variables can be set by
+a script's caller. Changing these variables may lead to unintended code
+executions, but only from vetted executable programs, which is OK. For this to
+make sense, the system should provide a consistent security policy to avoid
+arbitrary code execution e.g., by enforcing a write xor execute policy.
+
+When ``SECBIT_EXEC_DENY_INTERACTIVE`` is set, a process should never interpret
+interactive user commands (e.g. scripts). However, if such commands are passed
+through a file descriptor (e.g. stdin), its content should be interpreted if a
+call to :manpage:`execveat(2)` with the related file descriptor and the
+``AT_EXECVE_CHECK`` flag succeed.
+
+For instance, script interpreters called with a script snippet as argument
+should always deny such execution if ``SECBIT_EXEC_DENY_INTERACTIVE`` is set.
+
+This secure bit may be set by user session managers, service managers,
+container runtimes, sandboxer tools... Except for test environments, the
+related ``SECBIT_EXEC_DENY_INTERACTIVE_LOCKED`` bit should also be set.
+
+Here is the expected behavior for a script interpreter according to combination
+of any exec securebits:
+
+1. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0``
+
+ Always interpret scripts, and allow arbitrary user commands (default).
+
+ No threat, everyone and everything is trusted, but we can get ahead of
+ potential issues thanks to the call to :manpage:`execveat(2)` with
+ ``AT_EXECVE_CHECK`` which should always be performed but ignored by the
+ script interpreter. Indeed, this check is still important to enable systems
+ administrators to verify requests (e.g. with audit) and prepare for
+ migration to a secure mode.
+
+2. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=0``
+
+ Deny script interpretation if they are not executable, but allow
+ arbitrary user commands.
+
+ The threat is (potential) malicious scripts run by trusted (and not fooled)
+ users. That can protect against unintended script executions (e.g. ``sh
+ /tmp/*.sh``). This makes sense for (semi-restricted) user sessions.
+
+3. ``SECBIT_EXEC_RESTRICT_FILE=0`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1``
+
+ Always interpret scripts, but deny arbitrary user commands.
+
+ This use case may be useful for secure services (i.e. without interactive
+ user session) where scripts' integrity is verified (e.g. with IMA/EVM or
+ dm-verity/IPE) but where access rights might not be ready yet. Indeed,
+ arbitrary interactive commands would be much more difficult to check.
+
+4. ``SECBIT_EXEC_RESTRICT_FILE=1`` and ``SECBIT_EXEC_DENY_INTERACTIVE=1``
+
+ Deny script interpretation if they are not executable, and also deny
+ any arbitrary user commands.
+
+ The threat is malicious scripts run by untrusted users (but trusted code).
+ This makes sense for system services that may only execute trusted scripts.
+
+.. Links
+.. _samples/check-exec/inc.c:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/samples/check-exec/inc.c
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 274cc7546efc..6272bcf11296 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -35,6 +35,7 @@ Security-related interfaces
mfd_noexec
spec_ctrl
tee
+ check_exec
Devices and I/O
===============
diff --git a/fs/exec.c b/fs/exec.c
index 2f0acef8908e..d58b061c5e42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -892,7 +892,8 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
.lookup_flags = LOOKUP_FOLLOW,
};
- if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+ if ((flags &
+ ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0)
return ERR_PTR(-EINVAL);
if (flags & AT_SYMLINK_NOFOLLOW)
open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
@@ -1564,6 +1565,21 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
}
bprm->interp = bprm->filename;
+ /*
+ * At this point, security_file_open() has already been called (with
+ * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will
+ * stop just after the security_bprm_creds_for_exec() call in
+ * bprm_execve(). Indeed, the kernel should not try to parse the
+ * content of the file with exec_binprm() nor change the calling
+ * thread, which means that the following security functions will not
+ * be called:
+ * - security_bprm_check()
+ * - security_bprm_creds_from_file()
+ * - security_bprm_committing_creds()
+ * - security_bprm_committed_creds()
+ */
+ bprm->is_check = !!(flags & AT_EXECVE_CHECK);
+
retval = bprm_mm_init(bprm);
if (!retval)
return bprm;
@@ -1845,7 +1861,7 @@ static int bprm_execve(struct linux_binprm *bprm)
/* Set the unchanging part of bprm->cred */
retval = security_bprm_creds_for_exec(bprm);
- if (retval)
+ if (retval || bprm->is_check)
goto out;
retval = exec_binprm(bprm);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 3305c849abd6..60d674af3080 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -44,7 +44,12 @@ struct linux_binprm {
*/
point_of_no_return:1,
/* Set when "comm" must come from the dentry. */
- comm_from_dentry:1;
+ comm_from_dentry:1,
+ /*
+ * Set by user space to check executability according to the
+ * caller's environment.
+ */
+ is_check:1;
struct file *executable; /* Executable to pass to the interpreter */
struct file *interpreter;
struct file *file;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 75e21a135483..d9a069b4a775 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -161,6 +161,7 @@
#define AUDIT_INTEGRITY_RULE 1805 /* policy rule */
#define AUDIT_INTEGRITY_EVM_XATTR 1806 /* New EVM-covered xattr */
#define AUDIT_INTEGRITY_POLICY_RULE 1807 /* IMA policy rules */
+#define AUDIT_INTEGRITY_USERSPACE 1808 /* Userspace enforced data integrity */
#define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 6e6907e63bfc..a15ac2fa4b20 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -155,4 +155,8 @@
#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */
#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */
+/* Flags for execveat2(2). */
+#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution
+ would be allowed. */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h
index d6d98877ff1a..3fba30dbd68b 100644
--- a/include/uapi/linux/securebits.h
+++ b/include/uapi/linux/securebits.h
@@ -52,10 +52,32 @@
#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
+/* See Documentation/userspace-api/check_exec.rst */
+#define SECURE_EXEC_RESTRICT_FILE 8
+#define SECURE_EXEC_RESTRICT_FILE_LOCKED 9 /* make bit-8 immutable */
+
+#define SECBIT_EXEC_RESTRICT_FILE (issecure_mask(SECURE_EXEC_RESTRICT_FILE))
+#define SECBIT_EXEC_RESTRICT_FILE_LOCKED \
+ (issecure_mask(SECURE_EXEC_RESTRICT_FILE_LOCKED))
+
+/* See Documentation/userspace-api/check_exec.rst */
+#define SECURE_EXEC_DENY_INTERACTIVE 10
+#define SECURE_EXEC_DENY_INTERACTIVE_LOCKED 11 /* make bit-10 immutable */
+
+#define SECBIT_EXEC_DENY_INTERACTIVE \
+ (issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
+#define SECBIT_EXEC_DENY_INTERACTIVE_LOCKED \
+ (issecure_mask(SECURE_EXEC_DENY_INTERACTIVE_LOCKED))
+
#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \
issecure_mask(SECURE_NO_SETUID_FIXUP) | \
issecure_mask(SECURE_KEEP_CAPS) | \
- issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+ issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \
+ issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \
+ issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
#define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1)
+#define SECURE_ALL_UNPRIVILEGED (issecure_mask(SECURE_EXEC_RESTRICT_FILE) | \
+ issecure_mask(SECURE_EXEC_DENY_INTERACTIVE))
+
#endif /* _UAPI_LINUX_SECUREBITS_H */
diff --git a/samples/Kconfig b/samples/Kconfig
index b288d9991d27..84a9d4e8d947 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -291,6 +291,15 @@ config SAMPLE_CGROUP
help
Build samples that demonstrate the usage of the cgroup API.
+config SAMPLE_CHECK_EXEC
+ bool "Exec secure bits examples"
+ depends on CC_CAN_LINK && HEADERS_INSTALL
+ help
+ Build a tool to easily configure SECBIT_EXEC_RESTRICT_FILE and
+ SECBIT_EXEC_DENY_INTERACTIVE, and a simple script interpreter to
+ demonstrate how they should be used with execveat(2) +
+ AT_EXECVE_CHECK.
+
source "samples/rust/Kconfig"
endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index b85fa64390c5..f988202f3a30 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,6 +3,7 @@
subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay
subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs
+subdir-$(CONFIG_SAMPLE_CHECK_EXEC) += check-exec
subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup
obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/
obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/
diff --git a/samples/check-exec/.gitignore b/samples/check-exec/.gitignore
new file mode 100644
index 000000000000..cd759a19dacd
--- /dev/null
+++ b/samples/check-exec/.gitignore
@@ -0,0 +1,2 @@
+/inc
+/set-exec
diff --git a/samples/check-exec/Makefile b/samples/check-exec/Makefile
new file mode 100644
index 000000000000..c4f08ad0f8e3
--- /dev/null
+++ b/samples/check-exec/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+userprogs-always-y := \
+ inc \
+ set-exec
+
+userccflags += -I usr/include
+
+.PHONY: all clean
+
+all:
+ $(MAKE) -C ../.. samples/check-exec/
+
+clean:
+ $(MAKE) -C ../.. M=samples/check-exec/ clean
diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c
new file mode 100644
index 000000000000..94b87569d2a2
--- /dev/null
+++ b/samples/check-exec/inc.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Very simple script interpreter that can evaluate two different commands (one
+ * per line):
+ * - "?" to initialize a counter from user's input;
+ * - "+" to increment the counter (which is set to 0 by default).
+ *
+ * See tools/testing/selftests/exec/check-exec-tests.sh and
+ * Documentation/userspace-api/check_exec.rst
+ *
+ * Copyright © 2024 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/fcntl.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+/* Returns 1 on error, 0 otherwise. */
+static int interpret_buffer(char *buffer, size_t buffer_size)
+{
+ char *line, *saveptr = NULL;
+ long long number = 0;
+
+ /* Each command is the first character of a line. */
+ saveptr = NULL;
+ line = strtok_r(buffer, "\n", &saveptr);
+ while (line) {
+ if (*line != '#' && strlen(line) != 1) {
+ fprintf(stderr, "# ERROR: Unknown string\n");
+ return 1;
+ }
+ switch (*line) {
+ case '#':
+ /* Skips shebang and comments. */
+ break;
+ case '+':
+ /* Increments and prints the number. */
+ number++;
+ printf("%lld\n", number);
+ break;
+ case '?':
+ /* Reads integer from stdin. */
+ fprintf(stderr, "> Enter new number: \n");
+ if (scanf("%lld", &number) != 1) {
+ fprintf(stderr,
+ "# WARNING: Failed to read number from stdin\n");
+ }
+ break;
+ default:
+ fprintf(stderr, "# ERROR: Unknown character '%c'\n",
+ *line);
+ return 1;
+ }
+ line = strtok_r(NULL, "\n", &saveptr);
+ }
+ return 0;
+}
+
+/* Returns 1 on error, 0 otherwise. */
+static int interpret_stream(FILE *script, char *const script_name,
+ char *const *const envp, const bool restrict_stream)
+{
+ int err;
+ char *const script_argv[] = { script_name, NULL };
+ char buf[128] = {};
+ size_t buf_size = sizeof(buf);
+
+ /*
+ * We pass a valid argv and envp to the kernel to emulate a native
+ * script execution. We must use the script file descriptor instead of
+ * the script path name to avoid race conditions.
+ */
+ err = execveat(fileno(script), "", script_argv, envp,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ if (err && restrict_stream) {
+ perror("ERROR: Script execution check");
+ return 1;
+ }
+
+ /* Reads script. */
+ buf_size = fread(buf, 1, buf_size - 1, script);
+ return interpret_buffer(buf, buf_size);
+}
+
+static void print_usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s <script.inc> | -i | -c <command>\n\n",
+ argv0);
+ fprintf(stderr, "Example:\n");
+ fprintf(stderr, " ./set-exec -fi -- ./inc -i < script-exec.inc\n");
+}
+
+int main(const int argc, char *const argv[], char *const *const envp)
+{
+ int opt;
+ char *cmd = NULL;
+ char *script_name = NULL;
+ bool interpret_stdin = false;
+ FILE *script_file = NULL;
+ int secbits;
+ bool deny_interactive, restrict_file;
+ size_t arg_nb;
+
+ secbits = prctl(PR_GET_SECUREBITS);
+ if (secbits == -1) {
+ /*
+ * This should never happen, except with a buggy seccomp
+ * filter.
+ */
+ perror("ERROR: Failed to get securebits");
+ return 1;
+ }
+
+ deny_interactive = !!(secbits & SECBIT_EXEC_DENY_INTERACTIVE);
+ restrict_file = !!(secbits & SECBIT_EXEC_RESTRICT_FILE);
+
+ while ((opt = getopt(argc, argv, "c:i")) != -1) {
+ switch (opt) {
+ case 'c':
+ if (cmd) {
+ fprintf(stderr, "ERROR: Command already set");
+ return 1;
+ }
+ cmd = optarg;
+ break;
+ case 'i':
+ interpret_stdin = true;
+ break;
+ default:
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ /* Checks that only one argument is used, or read stdin. */
+ arg_nb = !!cmd + !!interpret_stdin;
+ if (arg_nb == 0 && argc == 2) {
+ script_name = argv[1];
+ } else if (arg_nb != 1) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ if (cmd) {
+ /*
+ * Other kind of interactive interpretations should be denied
+ * as well (e.g. CLI arguments passing script snippets,
+ * environment variables interpreted as script). However, any
+ * way to pass script files should only be restricted according
+ * to restrict_file.
+ */
+ if (deny_interactive) {
+ fprintf(stderr,
+ "ERROR: Interactive interpretation denied.\n");
+ return 1;
+ }
+
+ return interpret_buffer(cmd, strlen(cmd));
+ }
+
+ if (interpret_stdin && !script_name) {
+ script_file = stdin;
+ /*
+ * As for any execve(2) call, this path may be logged by the
+ * kernel.
+ */
+ script_name = "/proc/self/fd/0";
+ /*
+ * When stdin is used, it can point to a regular file or a
+ * pipe. Restrict stdin execution according to
+ * SECBIT_EXEC_DENY_INTERACTIVE but always allow executable
+ * files (which are not considered as interactive inputs).
+ */
+ return interpret_stream(script_file, script_name, envp,
+ deny_interactive);
+ } else if (script_name && !interpret_stdin) {
+ /*
+ * In this sample, we don't pass any argument to scripts, but
+ * otherwise we would have to forge an argv with such
+ * arguments.
+ */
+ script_file = fopen(script_name, "r");
+ if (!script_file) {
+ perror("ERROR: Failed to open script");
+ return 1;
+ }
+ /*
+ * Restricts file execution according to
+ * SECBIT_EXEC_RESTRICT_FILE.
+ */
+ return interpret_stream(script_file, script_name, envp,
+ restrict_file);
+ }
+
+ print_usage(argv[0]);
+ return 1;
+}
diff --git a/samples/check-exec/run-script-ask.inc b/samples/check-exec/run-script-ask.inc
new file mode 100755
index 000000000000..8ef0fdc37266
--- /dev/null
+++ b/samples/check-exec/run-script-ask.inc
@@ -0,0 +1,9 @@
+#!/usr/bin/env sh
+# SPDX-License-Identifier: BSD-3-Clause
+
+DIR="$(dirname -- "$0")"
+
+PATH="${PATH}:${DIR}"
+
+set -x
+"${DIR}/script-ask.inc"
diff --git a/samples/check-exec/script-ask.inc b/samples/check-exec/script-ask.inc
new file mode 100755
index 000000000000..720a8e649225
--- /dev/null
+++ b/samples/check-exec/script-ask.inc
@@ -0,0 +1,5 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
+?
++
diff --git a/samples/check-exec/script-exec.inc b/samples/check-exec/script-exec.inc
new file mode 100755
index 000000000000..3245cb9d8dd1
--- /dev/null
+++ b/samples/check-exec/script-exec.inc
@@ -0,0 +1,4 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
++
diff --git a/samples/check-exec/script-noexec.inc b/samples/check-exec/script-noexec.inc
new file mode 100644
index 000000000000..3245cb9d8dd1
--- /dev/null
+++ b/samples/check-exec/script-noexec.inc
@@ -0,0 +1,4 @@
+#!/usr/bin/env inc
+# SPDX-License-Identifier: BSD-3-Clause
+
++
diff --git a/samples/check-exec/set-exec.c b/samples/check-exec/set-exec.c
new file mode 100644
index 000000000000..ba86a60a20dd
--- /dev/null
+++ b/samples/check-exec/set-exec.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Simple tool to set SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE,
+ * before executing a command.
+ *
+ * Copyright © 2024 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <errno.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+static void print_usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s -f|-i -- <cmd> [args]...\n\n", argv0);
+ fprintf(stderr, "Execute a command with\n");
+ fprintf(stderr, "- SECBIT_EXEC_RESTRICT_FILE set: -f\n");
+ fprintf(stderr, "- SECBIT_EXEC_DENY_INTERACTIVE set: -i\n");
+}
+
+int main(const int argc, char *const argv[], char *const *const envp)
+{
+ const char *cmd_path;
+ char *const *cmd_argv;
+ int opt, secbits_cur, secbits_new;
+ bool has_policy = false;
+
+ secbits_cur = prctl(PR_GET_SECUREBITS);
+ if (secbits_cur == -1) {
+ /*
+ * This should never happen, except with a buggy seccomp
+ * filter.
+ */
+ perror("ERROR: Failed to get securebits");
+ return 1;
+ }
+
+ secbits_new = secbits_cur;
+ while ((opt = getopt(argc, argv, "fi")) != -1) {
+ switch (opt) {
+ case 'f':
+ secbits_new |= SECBIT_EXEC_RESTRICT_FILE |
+ SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ has_policy = true;
+ break;
+ case 'i':
+ secbits_new |= SECBIT_EXEC_DENY_INTERACTIVE |
+ SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ has_policy = true;
+ break;
+ default:
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (!argv[optind] || !has_policy) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ if (secbits_cur != secbits_new &&
+ prctl(PR_SET_SECUREBITS, secbits_new)) {
+ perror("Failed to set secure bit(s).");
+ fprintf(stderr,
+ "Hint: The running kernel may not support this feature.\n");
+ return 1;
+ }
+
+ cmd_path = argv[optind];
+ cmd_argv = argv + optind;
+ fprintf(stderr, "Executing command...\n");
+ execvpe(cmd_path, cmd_argv, envp);
+ fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path,
+ strerror(errno));
+ return 1;
+}
diff --git a/security/commoncap.c b/security/commoncap.c
index cefad323a0b1..52ea01acb453 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1302,21 +1302,38 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
& (old->securebits ^ arg2)) /*[1]*/
|| ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
|| (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
- || (cap_capable(current_cred(),
- current_cred()->user_ns,
- CAP_SETPCAP,
- CAP_OPT_NONE) != 0) /*[4]*/
/*
* [1] no changing of bits that are locked
* [2] no unlocking of locks
* [3] no setting of unsupported bits
- * [4] doing anything requires privilege (go read about
- * the "sendmail capabilities bug")
*/
)
/* cannot change a locked bit */
return -EPERM;
+ /*
+ * Doing anything requires privilege (go read about the
+ * "sendmail capabilities bug"), except for unprivileged bits.
+ * Indeed, the SECURE_ALL_UNPRIVILEGED bits are not
+ * restrictions enforced by the kernel but by user space on
+ * itself.
+ */
+ if (cap_capable(current_cred(), current_cred()->user_ns,
+ CAP_SETPCAP, CAP_OPT_NONE) != 0) {
+ const unsigned long unpriv_and_locks =
+ SECURE_ALL_UNPRIVILEGED |
+ SECURE_ALL_UNPRIVILEGED << 1;
+ const unsigned long changed = old->securebits ^ arg2;
+
+ /* For legacy reason, denies non-change. */
+ if (!changed)
+ return -EPERM;
+
+ /* Denies privileged changes. */
+ if (changed & ~unpriv_and_locks)
+ return -EPERM;
+ }
+
new = prepare_creds();
if (!new)
return -ENOMEM;
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 884a3533f7af..f435eff4667f 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/file.h>
+#include <linux/binfmts.h>
#include <linux/fs.h>
#include <linux/xattr.h>
#include <linux/magic.h>
@@ -469,6 +470,17 @@ int ima_check_blacklist(struct ima_iint_cache *iint,
return rc;
}
+static bool is_bprm_creds_for_exec(enum ima_hooks func, struct file *file)
+{
+ struct linux_binprm *bprm;
+
+ if (func == BPRM_CHECK) {
+ bprm = container_of(&file, struct linux_binprm, file);
+ return bprm->is_check;
+ }
+ return false;
+}
+
/*
* ima_appraise_measurement - appraise file measurement
*
@@ -483,6 +495,7 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
int xattr_len, const struct modsig *modsig)
{
static const char op[] = "appraise_data";
+ int audit_msgno = AUDIT_INTEGRITY_DATA;
const char *cause = "unknown";
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_backing_inode(dentry);
@@ -494,6 +507,16 @@ int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
if (!(inode->i_opflags & IOP_XATTR) && !try_modsig)
return INTEGRITY_UNKNOWN;
+ /*
+ * Unlike any of the other LSM hooks where the kernel enforces file
+ * integrity, enforcing file integrity for the bprm_creds_for_exec()
+ * LSM hook with the AT_EXECVE_CHECK flag is left up to the discretion
+ * of the script interpreter(userspace). Differentiate kernel and
+ * userspace enforced integrity audit messages.
+ */
+ if (is_bprm_creds_for_exec(func, file))
+ audit_msgno = AUDIT_INTEGRITY_USERSPACE;
+
/* If reading the xattr failed and there's no modsig, error out. */
if (rc <= 0 && !try_modsig) {
if (rc && rc != -ENODATA)
@@ -569,7 +592,7 @@ out:
(iint->flags & IMA_FAIL_UNVERIFIABLE_SIGS))) {
status = INTEGRITY_FAIL;
cause = "unverifiable-signature";
- integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
+ integrity_audit_msg(audit_msgno, inode, filename,
op, cause, rc, 0);
} else if (status != INTEGRITY_PASS) {
/* Fix mode, but don't replace file signatures. */
@@ -589,7 +612,7 @@ out:
status = INTEGRITY_PASS;
}
- integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
+ integrity_audit_msg(audit_msgno, inode, filename,
op, cause, rc, 0);
} else {
ima_cache_flags(iint, func);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 9b87556b03a7..9f9897a7c217 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -555,6 +555,34 @@ static int ima_bprm_check(struct linux_binprm *bprm)
}
/**
+ * ima_bprm_creds_for_exec - collect/store/appraise measurement.
+ * @bprm: contains the linux_binprm structure
+ *
+ * Based on the IMA policy and the execveat(2) AT_EXECVE_CHECK flag, measure
+ * and appraise the integrity of a file to be executed by script interpreters.
+ * Unlike any of the other LSM hooks where the kernel enforces file integrity,
+ * enforcing file integrity is left up to the discretion of the script
+ * interpreter (userspace).
+ *
+ * On success return 0. On integrity appraisal error, assuming the file
+ * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
+ */
+static int ima_bprm_creds_for_exec(struct linux_binprm *bprm)
+{
+ /*
+ * As security_bprm_check() is called multiple times, both
+ * the script and the shebang interpreter are measured, appraised,
+ * and audited. Limit usage of this LSM hook to just measuring,
+ * appraising, and auditing the indirect script execution
+ * (e.g. ./sh example.sh).
+ */
+ if (!bprm->is_check)
+ return 0;
+
+ return ima_bprm_check(bprm);
+}
+
+/**
* ima_file_check - based on policy, collect/store measurement.
* @file: pointer to the file to be measured
* @mask: contains MAY_READ, MAY_WRITE, MAY_EXEC or MAY_APPEND
@@ -1174,6 +1202,7 @@ static int __init init_ima(void)
static struct security_hook_list ima_hooks[] __ro_after_init = {
LSM_HOOK_INIT(bprm_check_security, ima_bprm_check),
+ LSM_HOOK_INIT(bprm_creds_for_exec, ima_bprm_creds_for_exec),
LSM_HOOK_INIT(file_post_open, ima_file_check),
LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile),
LSM_HOOK_INIT(file_release, ima_file_free),
diff --git a/security/security.c b/security/security.c
index 39df4451455b..143561ebc3e8 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1248,6 +1248,12 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
* to 1 if AT_SECURE should be set to request libc enable secure mode. @bprm
* contains the linux_binprm structure.
*
+ * If execveat(2) is called with the AT_EXECVE_CHECK flag, bprm->is_check is
+ * set. The result must be the same as without this flag even if the execution
+ * will never really happen and @bprm will always be dropped.
+ *
+ * This hook must not change current->cred, only @bprm->cred.
+ *
* Return: Returns 0 if the hook is successful and permission is granted.
*/
int security_bprm_creds_for_exec(struct linux_binprm *bprm)
@@ -3097,6 +3103,10 @@ int security_file_receive(struct file *file)
* Save open-time permission checking state for later use upon file_permission,
* and recheck access if anything has changed since inode_permission.
*
+ * We can check if a file is opened for execution (e.g. execve(2) call), either
+ * directly or indirectly (e.g. ELF's ld.so) by checking file->f_flags &
+ * __FMODE_EXEC .
+ *
* Return: Returns 0 if permission is granted.
*/
int security_file_open(struct file *file)
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index a0dc5d4bf733..7f3d1ae762ec 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -9,9 +9,13 @@ execveat.ephemeral
execveat.denatured
non-regular
null-argv
+/check-exec
+/false
+/inc
/load_address.*
!load_address.c
/recursion-depth
+/set-exec
xxxxxxxx*
pipe
S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index ba012bc5aab9..45a3cfc435cf 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,26 +1,33 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS = -Wall
CFLAGS += -Wno-nonnull
+CFLAGS += $(KHDR_INCLUDES)
+
+LDLIBS += -lcap
ALIGNS := 0x1000 0x200000 0x1000000
ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS))
ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS))
ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES)
-TEST_PROGS := binfmt_script.py
+TEST_PROGS := binfmt_script.py check-exec-tests.sh
TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS)
+TEST_GEN_PROGS_EXTENDED := false inc set-exec script-exec.inc script-noexec.inc
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
TEST_GEN_PROGS += null-argv
+TEST_GEN_PROGS += check-exec
EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
$(OUTPUT)/S_I*.test
include ../lib.mk
+CHECK_EXEC_SAMPLES := $(top_srcdir)/samples/check-exec
+
$(OUTPUT)/subdir:
mkdir -p $@
$(OUTPUT)/script: Makefile
@@ -38,3 +45,13 @@ $(OUTPUT)/load_address.0x%: load_address.c
$(OUTPUT)/load_address.static.0x%: load_address.c
$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
-fPIE -static-pie $< -o $@
+$(OUTPUT)/false: false.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@
+$(OUTPUT)/inc: $(CHECK_EXEC_SAMPLES)/inc.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/set-exec: $(CHECK_EXEC_SAMPLES)/set-exec.c
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/script-exec.inc: $(CHECK_EXEC_SAMPLES)/script-exec.inc
+ cp $< $@
+$(OUTPUT)/script-noexec.inc: $(CHECK_EXEC_SAMPLES)/script-noexec.inc
+ cp $< $@
diff --git a/tools/testing/selftests/exec/check-exec-tests.sh b/tools/testing/selftests/exec/check-exec-tests.sh
new file mode 100755
index 000000000000..87102906ae3c
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec-tests.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the "inc" interpreter.
+#
+# See include/uapi/linux/securebits.h, include/uapi/linux/fcntl.h and
+# samples/check-exec/inc.c
+#
+# Copyright © 2024 Microsoft Corporation
+
+set -u -e -o pipefail
+
+EXPECTED_OUTPUT="1"
+exec 2>/dev/null
+
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
+exec_direct() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Updates PATH for `env` to execute the `inc` interpreter.
+ out="$(PATH="." "$@" "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for direct file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for direct file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_indirect() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Script passed as argument.
+ out="$("$@" ./inc "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for indirect file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for indirect file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_stdin_reg() {
+ local expect="$1"
+ local script="$2"
+ shift 2
+ local ret=0
+ local out
+
+ # Executing stdin must be allowed if the related file is executable.
+ out="$("$@" ./inc -i < "${script}")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for stdin regular file execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for stdin regular file execution: ${out}"
+ return 1
+ fi
+}
+
+exec_stdin_pipe() {
+ local expect="$1"
+ shift
+ local ret=0
+ local out
+
+ # A pipe is not executable.
+ out="$(cat script-exec.inc | "$@" ./inc -i)" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for stdin pipe execution: ${ret}"
+ return 1
+ fi
+}
+
+exec_argument() {
+ local expect="$1"
+ local ret=0
+ shift
+ local out
+
+ # Script not coming from a file must not be executed.
+ out="$("$@" ./inc -c "$(< script-exec.inc)")" || ret=$?
+
+ if [[ ${ret} -ne ${expect} ]]; then
+ echo "ERROR: Wrong expectation for arbitrary argument execution: ${ret}"
+ return 1
+ fi
+ if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+ echo "ERROR: Wrong output for arbitrary argument execution: ${out}"
+ return 1
+ fi
+}
+
+exec_interactive() {
+ exec_stdin_pipe "$@"
+ exec_argument "$@"
+}
+
+ktap_test() {
+ ktap_test_result "$*" "$@"
+}
+
+ktap_print_header
+ktap_set_plan 28
+
+# Without secbit configuration, nothing is changed.
+
+ktap_print_msg "By default, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc
+ktap_test exec_indirect 0 script-exec.inc
+
+ktap_print_msg "By default, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc
+
+ktap_print_msg "By default, non-executable scripts are allowed to be interpreted, but not directly executed."
+# We get 126 because of direct execution by Bash.
+ktap_test exec_direct 126 script-noexec.inc
+ktap_test exec_indirect 0 script-noexec.inc
+
+ktap_print_msg "By default, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc
+
+ktap_print_msg "By default, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0
+
+# With only file restriction: protect non-malicious users from inadvertent errors (e.g. python ~/Downloads/*.py).
+
+ktap_print_msg "With -f, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -f --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable scripts are not allowed to be executed nor interpreted."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -f --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0 ./set-exec -f --
+
+# With only denied interactive commands: check or monitor script content (e.g. with LSM).
+
+ktap_print_msg "With -i, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable scripts are allowed to be interpreted, but not directly executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -i --
+
+# With both file restriction and denied interactive commands: only allow executable scripts.
+
+ktap_print_msg "With -fi, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -fi --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable scripts are not allowed to be interpreted nor executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -fi --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -fi --
+
+ktap_finished
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
new file mode 100644
index 000000000000..4d3f4525e1e1
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test execveat(2) with AT_EXECVE_CHECK, and prctl(2) with
+ * SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their locked
+ * counterparts.
+ *
+ * Copyright © 2018-2020 ANSSI
+ * Copyright © 2024 Microsoft Corporation
+ *
+ * Author: Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <asm-generic/unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
+#include "../kselftest_harness.h"
+
+static void drop_privileges(struct __test_metadata *const _metadata)
+{
+ const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+ cap_t cap_p;
+
+ if ((cap_get_secbits() & noroot) != noroot)
+ EXPECT_EQ(0, cap_set_secbits(noroot));
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_clear(cap_p));
+
+ /*
+ * Drops everything, especially CAP_SETPCAP, CAP_DAC_OVERRIDE, and
+ * CAP_DAC_READ_SEARCH.
+ */
+ EXPECT_NE(-1, cap_set_proc(cap_p));
+ EXPECT_NE(-1, cap_free(cap_p));
+}
+
+static int test_secbits_set(const unsigned int secbits)
+{
+ int err;
+
+ err = prctl(PR_SET_SECUREBITS, secbits);
+ if (err)
+ return errno;
+ return 0;
+}
+
+FIXTURE(access)
+{
+ int memfd, pipefd;
+ int pipe_fds[2], socket_fds[2];
+};
+
+FIXTURE_VARIANT(access)
+{
+ const bool mount_exec;
+ const bool file_exec;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) {
+ /* clang-format on */
+ .mount_exec = true,
+ .file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) {
+ /* clang-format on */
+ .mount_exec = true,
+ .file_exec = false,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) {
+ /* clang-format on */
+ .mount_exec = false,
+ .file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) {
+ /* clang-format on */
+ .mount_exec = false,
+ .file_exec = false,
+};
+
+static const char binary_path[] = "./false";
+static const char workdir_path[] = "./test-mount";
+static const char reg_file_path[] = "./test-mount/regular_file";
+static const char dir_path[] = "./test-mount/directory";
+static const char block_dev_path[] = "./test-mount/block_device";
+static const char char_dev_path[] = "./test-mount/character_device";
+static const char fifo_path[] = "./test-mount/fifo";
+
+FIXTURE_SETUP(access)
+{
+ int procfd_path_size;
+ static const char path_template[] = "/proc/self/fd/%d";
+ char procfd_path[sizeof(path_template) + 10];
+
+ /* Makes sure we are not already restricted nor locked. */
+ EXPECT_EQ(0, test_secbits_set(0));
+
+ /*
+ * Cleans previous workspace if any error previously happened (don't
+ * check errors).
+ */
+ umount(workdir_path);
+ rmdir(workdir_path);
+
+ /* Creates a clean mount point. */
+ ASSERT_EQ(0, mkdir(workdir_path, 00700));
+ ASSERT_EQ(0, mount("test", workdir_path, "tmpfs",
+ MS_MGC_VAL | (variant->mount_exec ? 0 : MS_NOEXEC),
+ "mode=0700,size=9m"));
+
+ /* Creates a regular file. */
+ ASSERT_EQ(0, mknod(reg_file_path,
+ S_IFREG | (variant->file_exec ? 0700 : 0600), 0));
+ /* Creates a directory. */
+ ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0700 : 0600));
+ /* Creates a character device: /dev/null. */
+ ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3)));
+ /* Creates a block device: /dev/loop0 */
+ ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0)));
+ /* Creates a fifo. */
+ ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0600, 0));
+
+ /* Creates a regular file without user mount point. */
+ self->memfd = memfd_create("test-exec-probe", MFD_CLOEXEC);
+ ASSERT_LE(0, self->memfd);
+ /* Sets mode, which must be ignored by the exec check. */
+ ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0700 : 0600));
+
+ /* Creates a pipefs file descriptor. */
+ ASSERT_EQ(0, pipe(self->pipe_fds));
+ procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
+ path_template, self->pipe_fds[0]);
+ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+ self->pipefd = open(procfd_path, O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, self->pipefd);
+ ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0700 : 0600));
+
+ /* Creates a socket file descriptor. */
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0,
+ self->socket_fds));
+}
+
+FIXTURE_TEARDOWN_PARENT(access)
+{
+ /* There is no need to unlink the test files. */
+ EXPECT_EQ(0, umount(workdir_path));
+ EXPECT_EQ(0, rmdir(workdir_path));
+}
+
+static void fill_exec_fd(struct __test_metadata *_metadata, const int fd_out)
+{
+ char buf[1024];
+ size_t len;
+ int fd_in;
+
+ fd_in = open(binary_path, O_CLOEXEC | O_RDONLY);
+ ASSERT_LE(0, fd_in);
+ /* Cannot use copy_file_range(2) because of EXDEV. */
+ len = read(fd_in, buf, sizeof(buf));
+ EXPECT_LE(0, len);
+ while (len > 0) {
+ EXPECT_EQ(len, write(fd_out, buf, len))
+ {
+ TH_LOG("Failed to write: %s (%d)", strerror(errno),
+ errno);
+ }
+ len = read(fd_in, buf, sizeof(buf));
+ EXPECT_LE(0, len);
+ }
+ EXPECT_EQ(0, close(fd_in));
+}
+
+static void fill_exec_path(struct __test_metadata *_metadata,
+ const char *const path)
+{
+ int fd_out;
+
+ fd_out = open(path, O_CLOEXEC | O_WRONLY);
+ ASSERT_LE(0, fd_out)
+ {
+ TH_LOG("Failed to open %s: %s", path, strerror(errno));
+ }
+ fill_exec_fd(_metadata, fd_out);
+ EXPECT_EQ(0, close(fd_out));
+}
+
+static void test_exec_fd(struct __test_metadata *_metadata, const int fd,
+ const int err_code)
+{
+ char *const argv[] = { "", NULL };
+ int access_ret, access_errno;
+
+ /*
+ * If we really execute fd, filled with the "false" binary, the current
+ * thread will exits with an error, which will be interpreted by the
+ * test framework as an error. With AT_EXECVE_CHECK, we only check a
+ * potential successful execution.
+ */
+ access_ret =
+ execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ access_errno = errno;
+ if (err_code) {
+ EXPECT_EQ(-1, access_ret);
+ EXPECT_EQ(err_code, access_errno)
+ {
+ TH_LOG("Wrong error for execveat(2): %s (%d)",
+ strerror(access_errno), errno);
+ }
+ } else {
+ EXPECT_EQ(0, access_ret)
+ {
+ TH_LOG("Access denied: %s", strerror(access_errno));
+ }
+ }
+}
+
+static void test_exec_path(struct __test_metadata *_metadata,
+ const char *const path, const int err_code)
+{
+ int flags = O_CLOEXEC;
+ int fd;
+
+ /* Do not block on pipes. */
+ if (path == fifo_path)
+ flags |= O_NONBLOCK;
+
+ fd = open(path, flags | O_RDONLY);
+ ASSERT_LE(0, fd)
+ {
+ TH_LOG("Failed to open %s: %s", path, strerror(errno));
+ }
+ test_exec_fd(_metadata, fd, err_code);
+ EXPECT_EQ(0, close(fd));
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, regular_file_empty)
+{
+ const int exec = variant->mount_exec && variant->file_exec;
+
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+TEST_F(access, regular_file_elf)
+{
+ const int exec = variant->mount_exec && variant->file_exec;
+
+ fill_exec_path(_metadata, reg_file_path);
+
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, memfd_empty)
+{
+ const int exec = variant->file_exec;
+
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, memfd_elf)
+{
+ const int exec = variant->file_exec;
+
+ fill_exec_fd(_metadata, self->memfd);
+
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+ drop_privileges(_metadata);
+ test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, non_regular_files)
+{
+ test_exec_path(_metadata, dir_path, EACCES);
+ test_exec_path(_metadata, block_dev_path, EACCES);
+ test_exec_path(_metadata, char_dev_path, EACCES);
+ test_exec_path(_metadata, fifo_path, EACCES);
+ test_exec_fd(_metadata, self->socket_fds[0], EACCES);
+ test_exec_fd(_metadata, self->pipefd, EACCES);
+}
+
+/* clang-format off */
+FIXTURE(secbits) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(secbits)
+{
+ const bool is_privileged;
+ const int error;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, priv) {
+ /* clang-format on */
+ .is_privileged = true,
+ .error = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, unpriv) {
+ /* clang-format on */
+ .is_privileged = false,
+ .error = EPERM,
+};
+
+FIXTURE_SETUP(secbits)
+{
+ /* Makes sure no exec bits are set. */
+ EXPECT_EQ(0, test_secbits_set(0));
+ EXPECT_EQ(0, prctl(PR_GET_SECUREBITS));
+
+ if (!variant->is_privileged)
+ drop_privileges(_metadata);
+}
+
+FIXTURE_TEARDOWN(secbits)
+{
+}
+
+TEST_F(secbits, legacy)
+{
+ EXPECT_EQ(variant->error, test_secbits_set(0));
+}
+
+#define CHILD(...) \
+ do { \
+ pid_t child = vfork(); \
+ EXPECT_LE(0, child); \
+ if (child == 0) { \
+ __VA_ARGS__; \
+ _exit(0); \
+ } \
+ } while (0)
+
+TEST_F(secbits, exec)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+ secbits &= ~(SECBIT_EXEC_RESTRICT_FILE | SECBIT_EXEC_DENY_INTERACTIVE);
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+ CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+}
+
+TEST_F(secbits, check_locked_set)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock set but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, check_locked_unset)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock unset but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_set)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock set but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_unset)
+{
+ unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+ secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+ EXPECT_EQ(0, test_secbits_set(secbits));
+
+ /* Checks lock unset but unchanged. */
+ EXPECT_EQ(variant->error, test_secbits_set(secbits));
+ CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+ secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+ EXPECT_EQ(EPERM, test_secbits_set(0));
+ CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/exec/config b/tools/testing/selftests/exec/config
new file mode 100644
index 000000000000..c308079867b3
--- /dev/null
+++ b/tools/testing/selftests/exec/config
@@ -0,0 +1,2 @@
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_LOOP=y
diff --git a/tools/testing/selftests/exec/false.c b/tools/testing/selftests/exec/false.c
new file mode 100644
index 000000000000..104383ec3a79
--- /dev/null
+++ b/tools/testing/selftests/exec/false.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+int main(void)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index 8fa84b521811..32dbfe9da2c4 100644
--- a/tools/testing/selftests/kselftest/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -41,7 +41,7 @@ ktap_skip_all() {
__ktap_test() {
result="$1"
description="$2"
- directive="$3" # optional
+ directive="${3:-}" # optional
local directive_str=
[ ! -z "$directive" ] && directive_str="# $directive"
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 8ac9aaf38eaa..2af86bd796ba 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -37,6 +37,10 @@
#include <linux/fs.h>
#include <linux/mount.h>
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
#include "common.h"
#ifndef renameat2
@@ -2014,6 +2018,22 @@ static void test_execute(struct __test_metadata *const _metadata, const int err,
};
}
+static void test_check_exec(struct __test_metadata *const _metadata,
+ const int err, const char *const path)
+{
+ int ret;
+ char *const argv[] = { (char *)path, NULL };
+
+ ret = execveat(AT_FDCWD, path, argv, NULL,
+ AT_EMPTY_PATH | AT_EXECVE_CHECK);
+ if (err) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(errno, err);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
TEST_F_FORK(layout1, execute)
{
const struct rule rules[] = {
@@ -2031,20 +2051,27 @@ TEST_F_FORK(layout1, execute)
copy_file(_metadata, bin_true, file1_s1d2);
copy_file(_metadata, bin_true, file1_s1d3);
+ /* Checks before file1_s1d1 being denied. */
+ test_execute(_metadata, 0, file1_s1d1);
+ test_check_exec(_metadata, 0, file1_s1d1);
+
enforce_ruleset(_metadata, ruleset_fd);
ASSERT_EQ(0, close(ruleset_fd));
ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
test_execute(_metadata, EACCES, file1_s1d1);
+ test_check_exec(_metadata, EACCES, file1_s1d1);
ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
test_execute(_metadata, 0, file1_s1d2);
+ test_check_exec(_metadata, 0, file1_s1d2);
ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
test_execute(_metadata, 0, file1_s1d3);
+ test_check_exec(_metadata, 0, file1_s1d3);
}
TEST_F_FORK(layout1, umount_sandboxer)