From 9b54d2017687df9fa827faf9e4022973b87fc0ff Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 11 Jan 2019 14:38:51 -0500 Subject: drm/amdkfd: add RAS ECC event support (v3) RAS ECC event will combine with GPU reset event, due to ECC interrupts are caused by uncorrectable error that triggers GPU reset. v2: Fix misleading-indentation warning v3: fix build with CONFIG_HSA_AMD disabled Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e622fd1fbd46..dc067ed0b72d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_HW_EXCEPTION_GPU_HANG 0 #define KFD_HW_EXCEPTION_ECC 1 +/* For kfd_hsa_memory_exception_data.ErrorType */ +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ @@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; __u32 gpu_id; - __u32 pad; + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ -- cgit v1.2.3 From 02ec6cafd78c2052283516afc74c309745d20271 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 19 Mar 2019 18:49:48 +0700 Subject: tipc: support broadcast/replicast configurable for bc-link Currently, a multicast stream uses either broadcast or replicast as transmission method, based on the ratio between number of actual destinations nodes and cluster size. However, when an L2 interface (e.g., VXLAN) provides pseudo broadcast support, this becomes very inefficient, as it blindly replicates multicast packets to all cluster/subnet nodes, irrespective of whether they host actual target sockets or not. The TIPC multicast algorithm is able to distinguish real destination nodes from other nodes, and hence provides a smarter and more efficient method for transferring multicast messages than pseudo broadcast can do. Because of this, we now make it possible for users to force the broadcast link to permanently switch to using replicast, irrespective of which capabilities the bearer provides, or pretend to provide. Conversely, we also make it possible to force the broadcast link to always use true broadcast. While maybe less useful in deployed systems, this may at least be useful for testing the broadcast algorithm in small clusters. We retain the current AUTOSELECT ability, i.e., to let the broadcast link automatically select which algorithm to use, and to switch back and forth between broadcast and replicast as the ratio between destination node number and cluster size changes. This remains the default method. Furthermore, we make it possible to configure the threshold ratio for such switches. The default ratio is now set to 10%, down from 25% in the earlier implementation. Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST, /* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 -- cgit v1.2.3 From f295b3ae9f5927e084bd5decdff82390e3471801 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 20 Mar 2019 02:03:36 +0000 Subject: net/tls: Add support of AES128-CCM based ciphers Added support for AES128-CCM based record encryption. AES128-CCM is similar to AES128-GCM. Both of them have same salt/iv/mac size. The notable difference between the two is that while invoking AES128-CCM operation, the salt||nonce (which is passed as IV) has to be prefixed with a hardcoded value '2'. Further, CCM implementation in kernel requires IV passed in crypto_aead_request() to be full '16' bytes. Therefore, the record structure 'struct tls_rec' has been modified to reserve '16' bytes for IV. This works for both GCM and CCM based cipher. Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- include/uapi/linux/tls.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 401d6f01de6a..5b9c26753e46 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -70,6 +70,13 @@ #define TLS_CIPHER_AES_GCM_256_TAG_SIZE 16 #define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE 8 +#define TLS_CIPHER_AES_CCM_128 53 +#define TLS_CIPHER_AES_CCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_CCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_CCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -94,4 +101,12 @@ struct tls12_crypto_info_aes_gcm_256 { unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aes_ccm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_CCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_CCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_CCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE]; +}; + #endif /* _UAPI_LINUX_TLS_H */ -- cgit v1.2.3 From a07b20004793d8926f78d63eb5980559f7813404 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add open_tree(2) to reference or clone a mount open_tree(dfd, pathname, flags) Returns an O_PATH-opened file descriptor or an error. dfd and pathname specify the location to open, in usual fashion (see e.g. fstatat(2)). flags should be an OR of some of the following: * AT_PATH_EMPTY, AT_NO_AUTOMOUNT, AT_SYMLINK_NOFOLLOW - same meanings as usual * OPEN_TREE_CLOEXEC - make the resulting descriptor close-on-exec * OPEN_TREE_CLONE or OPEN_TREE_CLONE | AT_RECURSIVE - instead of opening the location in question, create a detached mount tree matching the subtree rooted at location specified by dfd/pathname. With AT_RECURSIVE the entire subtree is cloned, without it - only the part within in the mount containing the location in question. In other words, the same as mount --rbind or mount --bind would've taken. The detached tree will be dissolved on the final close of obtained file. Creation of such detached trees requires the same capabilities as doing mount --bind. Signed-off-by: Al Viro Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/fcntl.h | 2 ++ include/uapi/linux/mount.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a2f8658f1c55..1d338357df8a 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -91,5 +91,7 @@ #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 3f9ec42510b0..fd7ae2e7eccf 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -55,4 +55,10 @@ #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 2db154b3ea8e14b04fee23e3fdfd5e9d17fbc6ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add move_mount(2) to move mounts around Add a move_mount() system call that will move a mount from one place to another and, in the next commit, allow to attach an unattached mount tree. The new system call looks like the following: int move_mount(int from_dfd, const char *from_path, int to_dfd, const char *to_path, unsigned int flags); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index fd7ae2e7eccf..3634e065836c 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -61,4 +61,15 @@ #define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT__MASK 0x00000077 + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 24dcb3d90a1f67fe08c68a004af37df059d74005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:33:31 +0000 Subject: vfs: syscall: Add fsopen() to prepare for superblock creation Provide an fsopen() system call that starts the process of preparing to create a superblock that will then be mountable, using an fd as a context handle. fsopen() is given the name of the filesystem that will be used: int mfd = fsopen(const char *fsname, unsigned int flags); where flags can be 0 or FSOPEN_CLOEXEC. For example: sfd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(sfd, FSCONFIG_SET_PATH, "source", "/dev/sda1", AT_FDCWD); fsconfig(sfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "acl", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0); fsconfig(sfd, FSCONFIG_SET_STRING, "sb", "1", 0); fsconfig(sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); fsinfo(sfd, NULL, ...); // query new superblock attributes mfd = fsmount(sfd, FSMOUNT_CLOEXEC, MS_RELATIME); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); sfd = fsopen("afs", -1); fsconfig(fd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); mfd = fsmount(sfd, 0, MS_NODEV); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); If an error is reported at any step, an error message may be available to be read() back (ENODATA will be reported if there isn't an error available) in the form: "e :" "e SELinux:Mount on mountpoint not permitted" Once fsmount() has been called, further fsconfig() calls will incur EBUSY, even if the fsmount() fails. read() is still possible to retrieve error information. The fsopen() syscall creates a mount context and hangs it of the fd that it returns. Netlink is not used because it is optional and would make the core VFS dependent on the networking layer and also potentially add network namespace issues. Note that, for the moment, the caller must have SYS_CAP_ADMIN to use fsopen(). Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 3634e065836c..7570df43d08f 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -72,4 +72,9 @@ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ #define MOVE_MOUNT__MASK 0x00000077 +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From ecdab150fddb42fe6a739335257949220033b782 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:09 +0000 Subject: vfs: syscall: Add fsconfig() for configuring and managing a context Add a syscall for configuring a filesystem creation context and triggering actions upon it, to be used in conjunction with fsopen, fspick and fsmount. long fsconfig(int fs_fd, unsigned int cmd, const char *key, const void *value, int aux); Where fs_fd indicates the context, cmd indicates the action to take, key indicates the parameter name for parameter-setting actions and, if needed, value points to a buffer containing the value and aux can give more information for the value. The following command IDs are proposed: (*) FSCONFIG_SET_FLAG: No value is specified. The parameter must be boolean in nature. The key may be prefixed with "no" to invert the setting. value must be NULL and aux must be 0. (*) FSCONFIG_SET_STRING: A string value is specified. The parameter can be expecting boolean, integer, string or take a path. A conversion to an appropriate type will be attempted (which may include looking up as a path). value points to a NUL-terminated string and aux must be 0. (*) FSCONFIG_SET_BINARY: A binary blob is specified. value points to the blob and aux indicates its size. The parameter must be expecting a blob. (*) FSCONFIG_SET_PATH: A non-empty path is specified. The parameter must be expecting a path object. value points to a NUL-terminated string that is the path and aux is a file descriptor at which to start a relative lookup or AT_FDCWD. (*) FSCONFIG_SET_PATH_EMPTY: As fsconfig_set_path, but with AT_EMPTY_PATH implied. (*) FSCONFIG_SET_FD: An open file descriptor is specified. value must be NULL and aux indicates the file descriptor. (*) FSCONFIG_CMD_CREATE: Trigger superblock creation. (*) FSCONFIG_CMD_RECONFIGURE: Trigger superblock reconfiguration. For the "set" command IDs, the idea is that the file_system_type will point to a list of parameters and the types of value that those parameters expect to take. The core code can then do the parse and argument conversion and then give the LSM and FS a cooked option or array of options to use. Source specification is also done the same way same way, using special keys "source", "source1", "source2", etc.. [!] Note that, for the moment, the key and value are just glued back together and handed to the filesystem. Every filesystem that uses options uses match_token() and co. to do this, and this will need to be changed - but not all at once. Example usage: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_path, "source", "/dev/sda1", AT_FDCWD); fsconfig(fd, fsconfig_set_path_empty, "journal_path", "", journal_fd); fsconfig(fd, fsconfig_set_fd, "journal_fd", "", journal_fd); fsconfig(fd, fsconfig_set_flag, "user_xattr", NULL, 0); fsconfig(fd, fsconfig_set_flag, "noacl", NULL, 0); fsconfig(fd, fsconfig_set_string, "sb", "1", 0); fsconfig(fd, fsconfig_set_string, "errors", "continue", 0); fsconfig(fd, fsconfig_set_string, "data", "journal", 0); fsconfig(fd, fsconfig_set_string, "context", "unconfined_u:...", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "/dev/sda1", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("afs", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("jffs2", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "mtd0", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 7570df43d08f..4b90ba9d1770 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -77,4 +77,18 @@ */ #define FSOPEN_CLOEXEC 0x00000001 +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 93766fbd2696c2c4453dd8e1070977e9cd4e6b6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:14 +0000 Subject: vfs: syscall: Add fsmount() to create a mount for a superblock Provide a system call by which a filesystem opened with fsopen() and configured by a series of fsconfig() calls can have a detached mount object created for it. This mount object can then be attached to the VFS mount hierarchy using move_mount() by passing the returned file descriptor as the from directory fd. The system call looks like: int mfd = fsmount(int fsfd, unsigned int flags, unsigned int attr_flags); where fsfd is the file descriptor returned by fsopen(). flags can be 0 or FSMOUNT_CLOEXEC. attr_flags is a bitwise-OR of the following flags: MOUNT_ATTR_RDONLY Mount read-only MOUNT_ATTR_NOSUID Ignore suid and sgid bits MOUNT_ATTR_NODEV Disallow access to device special files MOUNT_ATTR_NOEXEC Disallow program execution MOUNT_ATTR__ATIME Setting on how atime should be updated MOUNT_ATTR_RELATIME - Update atime relative to mtime/ctime MOUNT_ATTR_NOATIME - Do not update access times MOUNT_ATTR_STRICTATIME - Always perform atime updates MOUNT_ATTR_NODIRATIME Do not update directory access times In the event that fsmount() fails, it may be possible to get an error message by calling read() on fsfd. If no message is available, ENODATA will be reported. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 4b90ba9d1770..3888d3b91dc5 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -91,4 +91,22 @@ enum fsconfig_command { FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ }; +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From cf3cba4a429be43e5527a3f78859b1bfd9ebc5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:23 +0000 Subject: vfs: syscall: Add fspick() to select a superblock for reconfiguration Provide an fspick() system call that can be used to pick an existing mountpoint into an fs_context which can thereafter be used to reconfigure a superblock (equivalent of the superblock side of -o remount). This looks like: int fd = fspick(AT_FDCWD, "/mnt", FSPICK_CLOEXEC | FSPICK_NO_AUTOMOUNT); fsconfig(fd, FSCONFIG_SET_FLAG, "intr", NULL, 0); fsconfig(fd, FSCONFIG_SET_FLAG, "noac", NULL, 0); fsconfig(fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0); At the point of fspick being called, the file descriptor referring to the filesystem context is in exactly the same state as the one that was created by fsopen() after fsmount() has been successfully called. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 3888d3b91dc5..96a0240f23fe 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -77,6 +77,14 @@ */ #define FSOPEN_CLOEXEC 0x00000001 +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + /* * The type of fsconfig() call made. */ -- cgit v1.2.3 From 162f33dd45a7ed606ae07c1e3f22db4ac43a584c Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:28:39 +0300 Subject: Move EM_ARCOMPACT and EM_ARCV2 to uapi/linux/elf-em.h These should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_ARCOMPACT and EM_ARCV2 in order to define AUDIT_ARCH_ARCOMPACT and AUDIT_ARCH_ARCV2 which are needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Vineet Gupta Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Alexey Brodkin Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-snps-arc@lists.infradead.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/elf-em.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 0c3000faedba..081675ed89cb 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -34,6 +34,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ +#define EM_ARCOMPACT 93 /* ARCompact processor */ #define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ @@ -42,6 +43,7 @@ #define EM_TILEPRO 188 /* Tilera TILEPro */ #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ #define EM_TILEGX 191 /* Tilera TILE-Gx */ +#define EM_ARCV2 195 /* ARCv2 Cores */ #define EM_RISCV 243 /* RISC-V */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_CSKY 252 /* C-SKY */ -- cgit v1.2.3 From 67f2a8a29311841ba6ab9b0e2d1b8f1e9978cd84 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:28:47 +0300 Subject: arc: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Vineet Gupta Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Alexey Brodkin Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-snps-arc@lists.infradead.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index f28acd952d03..1626727bb921 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -375,6 +375,10 @@ enum { #define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ALPHA (EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_ARCOMPACT (EM_ARCOMPACT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_ARCOMPACTBE (EM_ARCOMPACT) +#define AUDIT_ARCH_ARCV2 (EM_ARCV2|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_ARCV2BE (EM_ARCV2) #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) #define AUDIT_ARCH_CRIS (EM_CRIS|__AUDIT_ARCH_LE) -- cgit v1.2.3 From a43e66478ef7a2f8a7b2823b97cdae6605d34a02 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:28:53 +0300 Subject: c6x: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Mark Salter Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Aurelien Jacquiot Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-c6x-dev@linux-c6x.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 1626727bb921..35ccf269ca3d 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -381,6 +381,8 @@ enum { #define AUDIT_ARCH_ARCV2BE (EM_ARCV2) #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) +#define AUDIT_ARCH_C6X (EM_TI_C6000|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_C6XBE (EM_TI_C6000) #define AUDIT_ARCH_CRIS (EM_CRIS|__AUDIT_ARCH_LE) #define AUDIT_ARCH_CSKY (EM_CSKY|__AUDIT_ARCH_LE) #define AUDIT_ARCH_FRV (EM_FRV) -- cgit v1.2.3 From 122a43b107420fec4c69d1bf99706cbb0da40ad9 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:29:01 +0300 Subject: h8300: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Yoshinori Sato Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 35ccf269ca3d..09994181b2d5 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -386,6 +386,7 @@ enum { #define AUDIT_ARCH_CRIS (EM_CRIS|__AUDIT_ARCH_LE) #define AUDIT_ARCH_CSKY (EM_CSKY|__AUDIT_ARCH_LE) #define AUDIT_ARCH_FRV (EM_FRV) +#define AUDIT_ARCH_H8300 (EM_H8_300) #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) #define AUDIT_ARCH_IA64 (EM_IA_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_M32R (EM_M32R) -- cgit v1.2.3 From f4780e2db06df05c7349718baf1cd26767aa90f8 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:29:08 +0300 Subject: Move EM_HEXAGON to uapi/linux/elf-em.h This should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_HEXAGON in order to define AUDIT_ARCH_HEXAGON which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Richard Kuo Cc: linux-hexagon@vger.kernel.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 081675ed89cb..bd02325028d8 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -39,6 +39,7 @@ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ +#define EM_HEXAGON 164 /* QUALCOMM Hexagon */ #define EM_AARCH64 183 /* ARM 64 bit */ #define EM_TILEPRO 188 /* Tilera TILEPro */ #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ -- cgit v1.2.3 From d093153431dc6e5982ec77aabe31fa38d2041ac0 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:29:32 +0300 Subject: hexagon: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Richard Kuo Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-hexagon@vger.kernel.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 09994181b2d5..fc94e74c8b77 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -387,6 +387,7 @@ enum { #define AUDIT_ARCH_CSKY (EM_CSKY|__AUDIT_ARCH_LE) #define AUDIT_ARCH_FRV (EM_FRV) #define AUDIT_ARCH_H8300 (EM_H8_300) +#define AUDIT_ARCH_HEXAGON (EM_HEXAGON) #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) #define AUDIT_ARCH_IA64 (EM_IA_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_M32R (EM_M32R) -- cgit v1.2.3 From 530ff23a8e46814a1be4a516c707d3b0fb292186 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:29:48 +0300 Subject: Move EM_NDS32 to uapi/linux/elf-em.h This should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_NDS32 in order to define AUDIT_ARCH_NDS32 which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Acked-by: Vincent Chen Acked-by: Greentime Hu Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/elf-em.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index bd02325028d8..4b8df722330e 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -40,6 +40,8 @@ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ #define EM_HEXAGON 164 /* QUALCOMM Hexagon */ +#define EM_NDS32 167 /* Andes Technology compact code size + embedded RISC processor family */ #define EM_AARCH64 183 /* ARM 64 bit */ #define EM_TILEPRO 188 /* Tilera TILEPro */ #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ -- cgit v1.2.3 From fa562447e154334523daa44c0b60625d71a345f5 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:29:57 +0300 Subject: nds32: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Acked-by: Vincent Chen Acked-by: Greentime Hu Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index fc94e74c8b77..fb0529da4d49 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -401,6 +401,8 @@ enum { #define AUDIT_ARCH_MIPSEL64 (EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_MIPSEL64N32 (EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE|\ __AUDIT_ARCH_CONVENTION_MIPS64_N32) +#define AUDIT_ARCH_NDS32 (EM_NDS32|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_NDS32BE (EM_NDS32) #define AUDIT_ARCH_OPENRISC (EM_OPENRISC) #define AUDIT_ARCH_PARISC (EM_PARISC) #define AUDIT_ARCH_PARISC64 (EM_PARISC|__AUDIT_ARCH_64BIT) -- cgit v1.2.3 From 1660aac45e5b49a5ace29fb5b73254617533fcbd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:30:02 +0300 Subject: nios2: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Acked-by: Ley Foon Tan Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Ley Foon Tan Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: nios2-dev@lists.rocketboards.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index fb0529da4d49..bcc0619b046f 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -403,6 +403,7 @@ enum { __AUDIT_ARCH_CONVENTION_MIPS64_N32) #define AUDIT_ARCH_NDS32 (EM_NDS32|__AUDIT_ARCH_LE) #define AUDIT_ARCH_NDS32BE (EM_NDS32) +#define AUDIT_ARCH_NIOS2 (EM_ALTERA_NIOS2|__AUDIT_ARCH_LE) #define AUDIT_ARCH_OPENRISC (EM_OPENRISC) #define AUDIT_ARCH_PARISC (EM_PARISC) #define AUDIT_ARCH_PARISC64 (EM_PARISC|__AUDIT_ARCH_64BIT) -- cgit v1.2.3 From 03f7e6adfbd02d026817d2b0b21c8420fe58e0b3 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:30:06 +0300 Subject: Move EM_UNICORE to uapi/linux/elf-em.h This should never have been defined in the arch tree to begin with, and now uapi/linux/audit.h header is going to use EM_UNICORE in order to define AUDIT_ARCH_UNICORE which is needed to implement syscall_get_arch() which in turn is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Cc: Guan Xuetao Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 4b8df722330e..f47e853546fa 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -37,6 +37,7 @@ #define EM_ARCOMPACT 93 /* ARCompact processor */ #define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ +#define EM_UNICORE 110 /* UniCore-32 */ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ #define EM_HEXAGON 164 /* QUALCOMM Hexagon */ -- cgit v1.2.3 From b15fe94acece954feda32706e3ca7cc024999aee Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:30:11 +0300 Subject: unicore32: define syscall_get_arch() syscall_get_arch() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Acked-by: Paul Moore Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Guan Xuetao Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index bcc0619b046f..3901c51c0b93 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -424,6 +424,7 @@ enum { #define AUDIT_ARCH_TILEGX (EM_TILEGX|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_TILEGX32 (EM_TILEGX|__AUDIT_ARCH_LE) #define AUDIT_ARCH_TILEPRO (EM_TILEPRO|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_UNICORE (EM_UNICORE|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_XTENSA (EM_XTENSA) -- cgit v1.2.3 From 0c3e0e3bb623c3735b8c9ab8aa8332f944f83a9f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Wed, 20 Mar 2019 12:16:42 +0300 Subject: tun: Add ioctl() TUNGETDEVNETNS cmd to allow obtaining real net ns of tun device In commit f2780d6d7475 "tun: Add ioctl() SIOCGSKNS cmd to allow obtaining net ns of tun device" it was missed that tun may change its net ns, while net ns of socket remains the same as it was created initially. SIOCGSKNS returns net ns of socket, so it is not suitable for obtaining net ns of device. We may have two tun devices with the same names in two net ns, and in this case it's not possible to determ, which of them fd refers to (TUNGETIFF will return the same name). This patch adds new ioctl() cmd for obtaining net ns of a device. Reported-by: Harald Albrecht Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 23a6753b37df..454ae31b93c7 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -60,6 +60,7 @@ #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) #define TUNSETCARRIER _IOW('T', 226, int) +#define TUNGETDEVNETNS _IO('T', 227) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From edbf8c01de5a104a71ed6df2bf6421ceb2836a8e Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 22 Mar 2019 09:54:01 +0800 Subject: bpf: add skc_lookup_tcp helper Allow looking up a sock_common. This gives eBPF programs access to timewait and request sockets. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929c8e537a14..fab05317f5e7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2431,6 +2431,23 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2531,7 +2548,8 @@ union bpf_attr { FN(sk_fullsock), \ FN(tcp_sock), \ FN(skb_ecn_set_ce), \ - FN(get_listener_sock), + FN(get_listener_sock), \ + FN(skc_lookup_tcp), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 399040847084a69f345e0a52fd62f04654e0fce3 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 22 Mar 2019 09:54:02 +0800 Subject: bpf: add helper to check for a valid SYN cookie Using bpf_skc_lookup_tcp it's possible to ascertain whether a packet belongs to a known connection. However, there is one corner case: no sockets are created if SYN cookies are active. This means that the final ACK in the 3WHS is misclassified. Using the helper, we can look up the listening socket via bpf_skc_lookup_tcp and then check whether a packet is a valid SYN cookie ACK. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fab05317f5e7..3c04410137d9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2448,6 +2448,21 @@ union bpf_attr { * Pointer to **struct bpf_sock**, or **NULL** in case of failure. * For sockets with reuseport option, the **struct bpf_sock** * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether iph and th contain a valid SYN cookie ACK for + * the listening socket in sk. + * + * iph points to the start of the IPv4 or IPv6 header, while + * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * + * th points to the start of the TCP header, while th_len contains + * sizeof(struct tcphdr). + * + * Return + * 0 if iph and th are a valid SYN cookie ACK, or a negative error + * otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2549,7 +2564,8 @@ union bpf_attr { FN(tcp_sock), \ FN(skb_ecn_set_ce), \ FN(get_listener_sock), \ - FN(skc_lookup_tcp), + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 35d0a06dad2220d62042fd1a91a216d17744e724 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 4 Feb 2019 14:50:14 -0600 Subject: PCI: Cleanup register definition width and whitespace Follow the file conventions of: - register offsets not indented - fields within a register indented one space - field masks use same width as register - register field values indented an additional space No functional change intended. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 132 +++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 67 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 5c98133f2c94..f7d3e7831fa8 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * pci_regs.h - * * PCI standard defines * Copyright 1994, Drew Eckhardt * Copyright 1997--1999 Martin Mares @@ -15,7 +13,7 @@ * PCI System Design Guide * * For HyperTransport information, please consult the following manuals - * from http://www.hypertransport.org + * from http://www.hypertransport.org : * * The HyperTransport I/O Link Specification */ @@ -301,7 +299,7 @@ #define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ #define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ -/* Message Signalled Interrupts registers */ +/* Message Signalled Interrupt registers */ #define PCI_MSI_FLAGS 2 /* Message Control */ #define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ @@ -319,7 +317,7 @@ #define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ #define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ -/* MSI-X registers */ +/* MSI-X registers (in MSI-X capability) */ #define PCI_MSIX_FLAGS 2 /* Message Control */ #define PCI_MSIX_FLAGS_QSIZE 0x07FF /* Table size */ #define PCI_MSIX_FLAGS_MASKALL 0x4000 /* Mask all vectors for this function */ @@ -333,13 +331,13 @@ #define PCI_MSIX_FLAGS_BIRMASK PCI_MSIX_PBA_BIR /* deprecated */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ -/* MSI-X Table entry format */ +/* MSI-X Table entry format (in memory mapped by a BAR) */ #define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ +#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* CompactPCI Hotswap Register */ @@ -465,19 +463,19 @@ /* PCI Express capability registers */ #define PCI_EXP_FLAGS 2 /* Capabilities register */ -#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ -#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ -#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ -#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ -#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ -#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ -#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ -#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCIe to PCI/PCI-X Bridge */ -#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIe Bridge */ -#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ -#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ -#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ -#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCIe to PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIe Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ @@ -616,8 +614,8 @@ #define PCI_EXP_RTCAP 30 /* Root Capabilities */ #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ #define PCI_EXP_RTSTA 32 /* Root Status */ -#define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ -#define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ +#define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ +#define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ /* * The Device Capabilities 2, Device Status 2, Device Control 2, * Link Capabilities 2, Link Status 2, Link Control 2, @@ -637,13 +635,13 @@ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ -#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ +#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ -#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ -#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ @@ -659,11 +657,11 @@ #define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ -#define PCI_EXP_LNKCTL2_TLS 0x000f -#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ -#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ -#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ -#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ +#define PCI_EXP_LNKCTL2_TLS 0x000f +#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ +#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ +#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ +#define PCI_EXP_LNKCTL2_TLS_16_0GT 0x0004 /* Supported Speed 16GT/s */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ @@ -752,18 +750,18 @@ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ #define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ #define PCI_ERR_ROOT_STATUS 48 -#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ -#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ -#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ -#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 /* Multiple FATAL/NONFATAL */ -#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First UNC is Fatal */ -#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ -#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ -#define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 /* Multiple FATAL/NONFATAL */ +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First UNC is Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ #define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ /* Virtual Channel */ @@ -875,12 +873,12 @@ /* Page Request Interface */ #define PCI_PRI_CTRL 0x04 /* PRI control register */ -#define PCI_PRI_CTRL_ENABLE 0x01 /* Enable */ -#define PCI_PRI_CTRL_RESET 0x02 /* Reset */ +#define PCI_PRI_CTRL_ENABLE 0x0001 /* Enable */ +#define PCI_PRI_CTRL_RESET 0x0002 /* Reset */ #define PCI_PRI_STATUS 0x06 /* PRI status register */ -#define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ -#define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ -#define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_STATUS_RF 0x0001 /* Response Failure */ +#define PCI_PRI_STATUS_UPRGI 0x0002 /* Unexpected PRG index */ +#define PCI_PRI_STATUS_STOPPED 0x0100 /* PRI Stopped */ #define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ @@ -898,16 +896,16 @@ /* Single Root I/O Virtualization */ #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ -#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_VFM 0x00000001 /* VF Migration Capable */ #define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ #define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ -#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ -#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ -#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ -#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ -#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_CTRL_VFE 0x0001 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x0002 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x0004 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x0008 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x0010 /* ARI Capable Hierarchy */ #define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ -#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_STATUS_VFM 0x0001 /* VF Migration Status */ #define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ #define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ #define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ @@ -937,13 +935,13 @@ /* Access Control Service */ #define PCI_ACS_CAP 0x04 /* ACS Capability Register */ -#define PCI_ACS_SV 0x01 /* Source Validation */ -#define PCI_ACS_TB 0x02 /* Translation Blocking */ -#define PCI_ACS_RR 0x04 /* P2P Request Redirect */ -#define PCI_ACS_CR 0x08 /* P2P Completion Redirect */ -#define PCI_ACS_UF 0x10 /* Upstream Forwarding */ -#define PCI_ACS_EC 0x20 /* P2P Egress Control */ -#define PCI_ACS_DT 0x40 /* Direct Translated P2P */ +#define PCI_ACS_SV 0x0001 /* Source Validation */ +#define PCI_ACS_TB 0x0002 /* Translation Blocking */ +#define PCI_ACS_RR 0x0004 /* P2P Request Redirect */ +#define PCI_ACS_CR 0x0008 /* P2P Completion Redirect */ +#define PCI_ACS_UF 0x0010 /* Upstream Forwarding */ +#define PCI_ACS_EC 0x0020 /* P2P Egress Control */ +#define PCI_ACS_DT 0x0040 /* Direct Translated P2P */ #define PCI_ACS_EGRESS_BITS 0x05 /* ACS Egress Control Vector Size */ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ @@ -993,9 +991,9 @@ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL 6 /* DPC control */ -#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ -#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ -#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ +#define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ +#define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ #define PCI_EXP_DPC_STATUS 8 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ -- cgit v1.2.3 From 14aa31929b724b70fb63a9b0e7877da325b25cfe Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Mar 2019 14:32:54 -0400 Subject: bpf: add bpf_skb_adjust_room mode BPF_ADJ_ROOM_MAC bpf_skb_adjust_room net allows inserting room in an skb. Existing mode BPF_ADJ_ROOM_NET inserts room after the network header by pulling the skb, moving the network header forward and zeroing the new space. Add new mode BPF_ADJUST_ROOM_MAC that inserts room after the mac header. This allows inserting tunnel headers in front of the network header without having to recreate the network header in the original space, avoiding two copies. Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c04410137d9..7c8fd0647070 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1478,7 +1478,10 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). @@ -2627,6 +2630,7 @@ enum bpf_func_id { /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ -- cgit v1.2.3 From 2278f6cc151a8bef6ba0b3fe3009d14dc3c51c4a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Mar 2019 14:32:55 -0400 Subject: bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_FIXED_GSO bpf_skb_adjust_room adjusts gso_size of gso packets to account for the pushed or popped header room. This is not allowed with UDP, where gso_size delineates datagrams. Add an option to avoid these updates and allow this call for datagrams. It can also be used with TCP, when MSS is known to allow headroom, e.g., through MSS clamping or route MTU. Changes v1->v2: - document flag BPF_F_ADJ_ROOM_FIXED_GSO - do not expose BPF_F_ADJ_ROOM_MASK through uapi, as it may change. Link: https://patchwork.ozlabs.org/patch/1052497/ Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7c8fd0647070..4f157d0ec571 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1486,8 +1486,10 @@ union bpf_attr { * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * There is one supported flag at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2627,6 +2629,9 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, -- cgit v1.2.3 From 868d523535c2d00b696753ece606e641a816e91e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Mar 2019 14:32:56 -0400 Subject: bpf: add bpf_skb_adjust_room encap flags When pushing tunnel headers, annotate skbs in the same way as tunnel devices. For GSO packets, the network stack requires certain fields set to segment packets with tunnel headers. gro_gse_segment depends on transport and inner mac header, for instance. Add an option to pass this information. Remove the restriction on len_diff to network header length, which is too short, e.g., for GRE protocols. Changes v1->v2