From 7cd4c5c2101cb092db00f61f69d24380cf7a0ee8 Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 15 Aug 2022 11:20:25 -0500 Subject: security, lsm: Introduce security_create_user_ns() User namespaces are an effective tool to allow programs to run with permission without requiring the need for a program to run as root. User namespaces may also be used as a sandboxing technique. However, attackers sometimes leverage user namespaces as an initial attack vector to perform some exploit. [1,2,3] While it is not the unprivileged user namespace functionality, which causes the kernel to be exploitable, users/administrators might want to more granularly limit or at least monitor how various processes use this functionality, while vulnerable kernel subsystems are being patched. Preventing user namespace already creation comes in a few of forms in order of granularity: 1. /proc/sys/user/max_user_namespaces sysctl 2. Distro specific patch(es) 3. CONFIG_USER_NS To block a task based on its attributes, the LSM hook cred_prepare is a decent candidate for use because it provides more granular control, and it is called before create_user_ns(): cred = prepare_creds() security_prepare_creds() call_int_hook(cred_prepare, ... if (cred) create_user_ns(cred) Since security_prepare_creds() is meant for LSMs to copy and prepare credentials, access control is an unintended use of the hook. [4] Further, security_prepare_creds() will always return a ENOMEM if the hook returns any non-zero error code. This hook also does not handle the clone3 case which requires us to access a user space pointer to know if we're in the CLONE_NEW_USER call path which may be subject to a TOCTTOU attack. Lastly, cred_prepare is called in many call paths, and a targeted hook further limits the frequency of calls which is a beneficial outcome. Therefore introduce a new function security_create_user_ns() with an accompanying userns_create LSM hook. With the new userns_create hook, users will have more control over the observability and access control over user namespace creation. Users should expect that normal operation of user namespaces will behave as usual, and only be impacted when controls are implemented by users or administrators. This hook takes the prepared creds for LSM authors to write policy against. On success, the new namespace is applied to credentials, otherwise an error is returned. Links: 1. https://nvd.nist.gov/vuln/detail/CVE-2022-0492 2. https://nvd.nist.gov/vuln/detail/CVE-2022-25636 3. https://nvd.nist.gov/vuln/detail/CVE-2022-34918 4. https://lore.kernel.org/all/1c4b1c0d-12f6-6e9e-a6a3-cdce7418110c@schaufler-ca.com/ Reviewed-by: Christian Brauner (Microsoft) Reviewed-by: KP Singh Signed-off-by: Frederick Lawler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 4 ++++ include/linux/security.h | 6 ++++++ 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 806448173033..aa7272e83626 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) +LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, u32 *secid) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84a0d7e02176..2e11a2a22ed1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -806,6 +806,10 @@ * security attributes, e.g. for /proc/pid inodes. * @p contains the task_struct for the task. * @inode contains the inode structure for the inode. + * @userns_create: + * Check permission prior to creating a new user namespace. + * @cred points to prepared creds. + * Return 0 if successful, otherwise < 0 error code. * * Security hooks for Netlink messaging. * diff --git a/include/linux/security.h b/include/linux/security.h index 1bc362cb413f..767802fe9bfa 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); +int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); int security_msg_msg_alloc(struct msg_msg *msg); @@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2, static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { } +static inline int security_create_user_ns(const struct cred *cred) +{ + return 0; +} + static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { -- cgit v1.2.3 From 3fd6d6e2b4e80fe45bfd1c8f01dff7d30a0f9b53 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 5 Aug 2022 00:43:17 +0200 Subject: thermal/of: Rework the thermal device tree initialization The following changes are reworking entirely the thermal device tree initialization. The old version is kept until the different drivers using it are converted to the new API. The old approach creates the different actors independently. This approach is the source of the code duplication in the thermal OF because a thermal zone is created but a sensor is registered after. The thermal zones are created unconditionnaly with a fake sensor at init time, thus forcing to provide fake ops and store all the thermal zone related information in duplicated structures. Then the sensor is initialized and the code looks up the thermal zone name using the device tree. Then the sensor is associated to the thermal zone, and the sensor specific ops are called with a second level of indirection from the thermal zone ops. When a sensor is removed (with a module unload), the thermal zone stays there with the fake sensor. The cooling device associated with a thermal zone and a trip point is stored in a list, again duplicating information, using the node name of the device tree to match afterwards the cooling devices. The new approach is simpler, it creates a thermal zone when the sensor is registered and destroys it when the sensor is removed. All the matching between the cooling device, trip points and thermal zones are done using the device tree, as well as bindings. The ops are no longer specific but uses the generic ones provided by the thermal framework. When the old code won't have any users, it can be removed and the remaining thermal OF code will be much simpler. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220804224349.1926752-2-daniel.lezcano@linexp.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1386c713885d..e2ac9d473bd6 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -325,6 +325,16 @@ struct thermal_zone_of_device_ops { /* Function declarations */ #ifdef CONFIG_THERMAL_OF +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops); + +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + +void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz); + int thermal_zone_of_get_sensor_id(struct device_node *tz_np, struct device_node *sensor_np, u32 *id); @@ -366,6 +376,14 @@ static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register( return ERR_PTR(-ENODEV); } +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz) +{ +} + +static inline void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz) +{ +} + static inline void devm_thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *tz) -- cgit v1.2.3 From f59ac19b7f44cab23df84810e2da5f57bdd3a7e7 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 5 Aug 2022 00:43:49 +0200 Subject: thermal/of: Remove old OF code All the drivers are converted to the new OF API, remove the old OF code. Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20220804224349.1926752-34-daniel.lezcano@linexp.org Signed-off-by: Daniel Lezcano --- include/linux/thermal.h | 77 ++++++++++--------------------------------------- 1 file changed, 15 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e2ac9d473bd6..86c24ddd5985 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -296,33 +296,6 @@ struct thermal_zone_params { int offset; }; -/** - * struct thermal_zone_of_device_ops - callbacks for handling DT based zones - * - * Mandatory: - * @get_temp: a pointer to a function that reads the sensor temperature. - * - * Optional: - * @get_trend: a pointer to a function that reads the sensor temperature trend. - * @set_trips: a pointer to a function that sets a temperature window. When - * this window is left the driver must inform the thermal core via - * thermal_zone_device_update. - * @set_emul_temp: a pointer to a function that sets sensor emulated - * temperature. - * @set_trip_temp: a pointer to a function that sets the trip temperature on - * hardware. - * @change_mode: a pointer to a function that notifies the thermal zone - * mode change. - */ -struct thermal_zone_of_device_ops { - int (*get_temp)(void *, int *); - int (*get_trend)(void *, int, enum thermal_trend *); - int (*set_trips)(void *, int, int); - int (*set_emul_temp)(void *, int); - int (*set_trip_temp)(void *, int, int); - int (*change_mode) (void *, enum thermal_device_mode); -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, @@ -335,61 +308,41 @@ void thermal_of_zone_unregister(struct thermal_zone_device *tz); void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz); +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + int thermal_zone_of_get_sensor_id(struct device_node *tz_np, struct device_node *sensor_np, u32 *id); -struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); -struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); #else - -static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id) -{ - return -ENOENT; -} -static inline struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) -{ - return ERR_PTR(-ENODEV); -} - static inline -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops) { + return ERR_PTR(-ENOTSUPP); } -static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops) { - return ERR_PTR(-ENODEV); + return ERR_PTR(-ENOTSUPP); } static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz) { } -static inline void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz) +static inline void devm_thermal_of_zone_unregister(struct device *dev, + struct thermal_zone_device *tz) { } -static inline -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, + struct device_node *sensor_np, + u32 *id) { + return -ENOENT; } - #endif #ifdef CONFIG_THERMAL -- cgit v1.2.3 From 272ac1500372183ffd54b0c9f43f52afc482e610 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Aug 2022 11:45:01 -0700 Subject: fscrypt: remove fscrypt_set_test_dummy_encryption() Now that all its callers have been converted to fscrypt_parse_test_dummy_encryption() and fscrypt_add_test_dummy_key() instead, fscrypt_set_test_dummy_encryption() can be removed. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220513231605.175121-6-ebiggers@kernel.org --- include/linux/fscrypt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7d2f1e0f23b1..b95b8601b9c1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -295,8 +295,6 @@ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); -int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, - struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool -- cgit v1.2.3 From 45e9aa1fdbb2ebafec88c64bc53fe45cf8935b49 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Aug 2022 18:14:22 +0200 Subject: ACPI: Rename acpi_bus_get/put_acpi_device() Because acpi_bus_get_acpi_device() is completely analogous to acpi_fetch_acpi_dev(), rename it to acpi_get_acpi_dev() and add a kerneldoc comment to it. Accordingly, rename acpi_bus_put_acpi_device() to acpi_put_acpi_dev() and update all of the users of these two functions. While at it, move the acpi_fetch_acpi_dev() header next to the acpi_get_acpi_dev() header in the header file holding them. Signed-off-by: Rafael J. Wysocki Acked-by: Guenter Roeck Reviewed-by: Punit Agrawal --- include/acpi/acpi_bus.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index e7d27373ff71..7ff067a5a3bd 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -512,7 +512,6 @@ extern int unregister_acpi_notifier(struct notifier_block *); * External Functions */ -struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); acpi_status acpi_bus_get_status_handle(acpi_handle handle, unsigned long long *sta); int acpi_bus_get_status(struct acpi_device *device); @@ -767,9 +766,10 @@ static inline void acpi_dev_put(struct acpi_device *adev) put_device(&adev->dev); } -struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); +struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle); +struct acpi_device *acpi_get_acpi_dev(acpi_handle handle); -static inline void acpi_bus_put_acpi_device(struct acpi_device *adev) +static inline void acpi_put_acpi_dev(struct acpi_device *adev) { acpi_dev_put(adev); } -- cgit v1.2.3 From 7a3de7324c2b1299a4f595bb6aa503c878ad7d75 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:24 -0400 Subject: fs: dlm: trace user space callbacks This patch adds trace callbacks for user locks. Unfortenately user locks are handled in a different way than kernel locks in some cases. User locks never call the dlm_lock()/dlm_unlock() kernel API and use the next step internal API of dlm. Adding those traces from user API callers should make it possible for dlm trace system to see lock handling for user locks as well. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/trace/events/dlm.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index bad21222130e..18575206295f 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -92,9 +92,10 @@ TRACE_EVENT(dlm_lock_start, TRACE_EVENT(dlm_lock_end, TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, - unsigned int namelen, int mode, __u32 flags, int error), + unsigned int namelen, int mode, __u32 flags, int error, + bool kernel_lock), - TP_ARGS(ls, lkb, name, namelen, mode, flags, error), + TP_ARGS(ls, lkb, name, namelen, mode, flags, error, kernel_lock), TP_STRUCT__entry( __field(__u32, ls_id) @@ -113,6 +114,7 @@ TRACE_EVENT(dlm_lock_end, __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; + __entry->error = error; r = lkb->lkb_resource; if (r) @@ -122,14 +124,14 @@ TRACE_EVENT(dlm_lock_end, memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); - /* return value will be zeroed in those cases by dlm_lock() - * we do it here again to not introduce more overhead if - * trace isn't running and error reflects the return value. - */ - if (error == -EAGAIN || error == -EDEADLK) - __entry->error = 0; - else - __entry->error = error; + if (kernel_lock) { + /* return value will be zeroed in those cases by dlm_lock() + * we do it here again to not introduce more overhead if + * trace isn't running and error reflects the return value. + */ + if (error == -EAGAIN || error == -EDEADLK) + __entry->error = 0; + } ), -- cgit v1.2.3 From 12cda13cfd5310bbfefdfe32a82489228e2e0381 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:25 -0400 Subject: fs: dlm: remove DLM_LSFL_FS from uapi The DLM_LSFL_FS flag is set in lockspaces created directly for a kernel user, as opposed to those lockspaces created for user space applications. The user space libdlm allowed this flag to be set for lockspaces created from user space, but then used by a kernel user. No kernel user has ever used this method, so remove the ability to do it. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 3 --- include/uapi/linux/dlm.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index ff951e9f6f20..f5f55c2138ae 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -56,9 +56,6 @@ struct dlm_lockspace_ops { * DLM_LSFL_TIMEWARN * The dlm should emit netlink messages if locks have been waiting * for a configurable amount of time. (Unused.) - * DLM_LSFL_FS - * The lockspace user is in the kernel (i.e. filesystem). Enables - * direct bast/cast callbacks. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. * diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index 0d2eca287567..1923f4f3b05e 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -69,7 +69,6 @@ struct dlm_lksb { /* dlm_new_lockspace() flags */ #define DLM_LSFL_TIMEWARN 0x00000002 -#define DLM_LSFL_FS 0x00000004 #define DLM_LSFL_NEWEXCL 0x00000008 -- cgit v1.2.3 From 56171e0db23a5e0edce1596dd2792b95ffe57bd3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:27 -0400 Subject: fs: dlm: const void resource name parameter The resource name parameter should never be changed by DLM so we declare it as const. At some point it is handled as a char pointer, a resource name can be a non printable ascii string as well. This patch change it to handle it as void pointer as it is offered by DLM API. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 2 +- include/trace/events/dlm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index f5f55c2138ae..c6bc2b5ee7e6 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -131,7 +131,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*lockast) (void *astarg), diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index 18575206295f..da0eaae98fa3 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -49,7 +49,7 @@ /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_lock_start, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags), TP_ARGS(ls, lkb, name, namelen, mode, flags), @@ -91,7 +91,7 @@ TRACE_EVENT(dlm_lock_start, TRACE_EVENT(dlm_lock_end, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags, int error, bool kernel_lock), -- cgit v1.2.3 From 62fcb99bdf10fed34b4fe6e225489fe4be2d0536 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Aug 2022 18:59:48 +0200 Subject: ACPI: Drop parent field from struct acpi_device The parent field in struct acpi_device is, in fact, redundant, because the dev.parent field in it effectively points to the same object and it is used by the driver core. Accordingly, the parent field can be dropped from struct acpi_device and for this purpose define acpi_dev_parent() to retrieve a parent struct acpi_device pointer from the dev.parent field in struct acpi_device. Next, update all of the users of the parent field in struct acpi_device to use acpi_dev_parent() instead of it and drop it. While at it, drop the ACPI_IS_ROOT_DEVICE() macro that is only used in one place in a confusing way. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Mark Brown Acked-by: Mika Westerberg Acked-by: Wei Liu Reviewed-by: Punit Agrawal --- include/acpi/acpi_bus.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7ff067a5a3bd..6289020fdab8 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -365,7 +365,6 @@ struct acpi_device { int device_type; acpi_handle handle; /* no handle for fixed hardware */ struct fwnode_handle fwnode; - struct acpi_device *parent; struct list_head wakeup_list; struct list_head del_list; struct acpi_device_status status; @@ -458,6 +457,14 @@ static inline void *acpi_driver_data(struct acpi_device *d) #define to_acpi_device(d) container_of(d, struct acpi_device, dev) #define to_acpi_driver(d) container_of(d, struct acpi_driver, drv) +static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev) +{ + if (adev->dev.parent) + return to_acpi_device(adev->dev.parent); + + return NULL; +} + static inline void acpi_set_device_status(struct acpi_device *adev, u32 sta) { *((u32 *)&adev->status) = sta; @@ -478,6 +485,7 @@ void acpi_initialize_hp_context(struct acpi_device *adev, /* acpi_device.dev.bus == &acpi_bus_type */ extern struct bus_type acpi_bus_type; +struct acpi_device *acpi_dev_parent(struct acpi_device *adev); int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data); int acpi_dev_for_each_child(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data); -- cgit v1.2.3 From c249ea9b4309cf3250c5bbb42a05d38d0ed9071c Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:32 -0700 Subject: Bluetooth: Move Adv Instance timer to hci_sync The Advertising Instance expiration timer adv_instance_expire was handled with the deprecated hci_request mechanism, rather than it's replacement: hci_sync. Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 3843f5060c73..aea950440b9d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -72,7 +72,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); - +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); int hci_update_passive_scan_sync(struct hci_dev *hdev); int hci_update_passive_scan(struct hci_dev *hdev); int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); -- cgit v1.2.3 From 3fe318ee72c54506534f51b4b4dfb19e0e0df2db Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:34 -0700 Subject: Bluetooth: move hci_get_random_address() to hci_sync This function has no dependencies on the deprecated hci_request mechanism, so has been moved unchanged to hci_sync.c Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index aea950440b9d..b6b975c2ed3e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -16,6 +16,7 @@ struct hci_cmd_sync_work_entry { hci_cmd_sync_work_destroy_t destroy; }; +struct adv_info; /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. @@ -51,6 +52,10 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_name_sync(struct hci_dev *hdev); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr); + int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type); -- cgit v1.2.3 From 651cd3d65b0f76a2198fcf3a80ce5d53dd267717 Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Fri, 5 Aug 2022 16:42:35 -0700 Subject: Bluetooth: convert hci_update_adv_data to hci_sync hci_update_adv_data() is called from hci_event and hci_core due to events from the controller. The prior function used the deprecated hci_request method, and the new one uses hci_sync.c Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index b6b975c2ed3e..17f5a4c32f36 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -61,6 +61,7 @@ int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data(struct hci_dev *hdev, u8 instance); int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force); -- cgit v1.2.3 From 5182a5d48c3d1992b2db8748f96914e07eee0956 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:58 +0300 Subject: net: allow storing xfrm interface metadata in metadata_dst XFRM interfaces provide the association of various XFRM transformations to a netdevice using an 'if_id' identifier common to both the XFRM data structures (polcies, states) and the interface. The if_id is configured by the controlling entity (usually the IKE daemon) and can be used by the administrator to define logical relations between different connections. For example, different connections can share the if_id identifier so that they pass through the same interface, . However, currently it is not possible for connections using a different if_id to use the same interface while retaining the logical separation between them, without using additional criteria such as skb marks or different traffic selectors. When having a large number of connections, it is useful to have a the logical separation offered by the if_id identifier but use a single network interface. Similar to the way collect_md mode is used in IP tunnels. This patch attempts to enable different configuration mechanisms - such as ebpf programs, LWT encapsulations, and TC - to attach metadata to skbs which would carry the if_id. This way a single xfrm interface in collect_md mode can demux traffic based on this configuration on tx and provide this metadata on rx. The XFRM metadata is somewhat similar to ip tunnel metadata in that it has an "id", and shares similar configuration entities (bpf, tc, ...), however, it does not necessarily represent an IP tunnel or use other ip tunnel information, and also has an optional "link" property which can be used for affecting underlying routing decisions. Additional xfrm related criteria may also be added in the future. Therefore, a new metadata type is introduced, to be used in subsequent patches in the xfrm interface and configuration entities. Reviewed-by: Nikolay Aleksandrov Reviewed-by: Nicolas Dichtel Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/dst_metadata.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index adab27ba1ecb..e4b059908cc7 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -9,6 +9,7 @@ enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, + METADATA_XFRM, }; struct hw_port_info { @@ -16,12 +17,18 @@ struct hw_port_info { u32 port_id; }; +struct xfrm_md_info { + u32 if_id; + int link; +}; + struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; + struct xfrm_md_info xfrm_info; } u; }; @@ -53,6 +60,16 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + if (md_dst && md_dst->type == METADATA_XFRM) + return &md_dst->u.xfrm_info; + + return NULL; +} + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -82,6 +99,9 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); + case METADATA_XFRM: + return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, + sizeof(a->u.xfrm_info)); default: return 1; } -- cgit v1.2.3 From abc340b38ba25cd6c7aa2c0bd9150d30738c82d0 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:59 +0300 Subject: xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 11 +++++++++-- include/uapi/linux/if_link.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6e8fa98f786f..28b988577ed2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -312,9 +312,15 @@ struct km_event { struct net *net; }; +struct xfrm_if_decode_session_result { + struct net *net; + u32 if_id; +}; + struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb, - unsigned short family); + bool (*decode_session)(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res); }; void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@ -985,6 +991,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ + bool collect_md; }; struct xfrm_if { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..d96f13a42589 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -694,6 +694,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; -- cgit v1.2.3 From 2c2493b9da9166478fe072e3054f8a5741dadb02 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:47:00 +0300 Subject: xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in collect_md mode Allow specifying the xfrm interface if_id and link as part of a route metadata using the lwtunnel infrastructure. This allows for example using a single xfrm interface in collect_md mode as the target of multiple routes each specifying a different if_id. With the appropriate changes to iproute2, considering an xfrm device ipsec1 in collect_md mode one can for example add a route specifying an if_id like so: ip route add dev ipsec1 encap xfrm if_id 1 In which case traffic routed to the device via this route would use if_id in the xfrm interface policy lookup. Or in the context of vrf, one can also specify the "link" property: ip route add dev ipsec1 encap xfrm if_id 1 link_dev eth15 Note: LWT_XFRM_LINK uses NLA_U32 similar to IFLA_XFRM_LINK even though internally "link" is signed. This is consistent with other _LINK attributes in other devices as well as in bpf and should not have an effect as device indexes can't be negative. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/dst_metadata.h | 11 +++++++++++ include/uapi/linux/lwtunnel.h | 10 ++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index e4b059908cc7..57f75960fa28 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -60,13 +60,24 @@ skb_tunnel_info(const struct sk_buff *skb) return NULL; } +static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) +{ + return (struct xfrm_md_info *)lwt->data; +} + static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; + dst = skb_dst(skb); + if (dst && dst->lwtstate && + dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) + return lwt_xfrm_info(dst->lwtstate); + return NULL; } diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 2e206919125c..229655ef792f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -15,6 +15,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, LWTUNNEL_ENCAP_IOAM6, + LWTUNNEL_ENCAP_XFRM, __LWTUNNEL_ENCAP_MAX, }; @@ -111,4 +112,13 @@ enum { #define LWT_BPF_MAX_HEADROOM 256 +enum { + LWT_XFRM_UNSPEC, + LWT_XFRM_IF_ID, + LWT_XFRM_LINK, + __LWT_XFRM_MAX, +}; + +#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From bc12b70f7d216b36bd87701349374a13e486f8eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Aug 2022 13:36:40 +0200 Subject: x86/earlyprintk: Clean up pciserial While working on a GRUB patch to support PCI-serial, a number of cleanups were suggested that apply to the code I took inspiration from. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Bjorn Helgaas # pci_ids.h Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/YwdeyCEtW+wa+QhH@worktop.programming.kicks-ass.net --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6feade66efdb..41b3fffdbb8e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -75,6 +75,9 @@ #define PCI_CLASS_COMMUNICATION_MODEM 0x0703 #define PCI_CLASS_COMMUNICATION_OTHER 0x0780 +/* Interface for SERIAL/MODEM */ +#define PCI_SERIAL_16550_COMPATIBLE 0x02 + #define PCI_BASE_CLASS_SYSTEM 0x08 #define PCI_CLASS_SYSTEM_PIC 0x0800 #define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 -- cgit v1.2.3 From e3b9b27865c45c771f95b5dcf70ee8e88b343c75 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 29 Aug 2022 17:53:19 +0200 Subject: ACPI: Drop redundant acpi_dev_parent() header Because acpi_dev_parent() is defined as static inline, the extra header of it in acpi_bus.h is redundant, so drop it. Fixes: 62fcb99bdf10 ("ACPI: Drop parent field from struct acpi_device") Reported-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo --- include/acpi/acpi_bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 6289020fdab8..42f76f2c2d49 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -485,7 +485,6 @@ void acpi_initialize_hp_context(struct acpi_device *adev, /* acpi_device.dev.bus == &acpi_bus_type */ extern struct bus_type acpi_bus_type; -struct acpi_device *acpi_dev_parent(struct acpi_device *adev); int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data); int acpi_dev_for_each_child(struct acpi_device *adev, int (*fn)(struct acpi_device *, void *), void *data); -- cgit v1.2.3 From 6b70fe0601adb1396ad0b85cdf05d217500b49e7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Aug 2022 14:38:42 +0200 Subject: acl: add vfs_set_acl_prepare() Various filesystems store POSIX ACLs on the backing store in their uapi format. Such filesystems need to translate from the uapi POSIX ACL format into the VFS format during i_op->get_acl(). The VFS provides the posix_acl_from_xattr() helper for this task. But the usage of posix_acl_from_xattr() is currently ambiguous. It is intended to transform from a uapi POSIX ACL to the VFS represenation. For example, when retrieving POSIX ACLs for permission checking during lookup or when calling getxattr() to retrieve system.posix_acl_{access,default}. Calling posix_acl_from_xattr() during i_op->get_acl() will map the raw {g,u}id values stored as ACL_{GROUP,USER} entries in the uapi POSIX ACL format into k{g,u}id_t in the filesystem's idmapping and return a struct posix_acl ready to be returned to the VFS for caching and to perform permission checks on. However, posix_acl_from_xattr() is also called during setxattr() for all filesystems that rely on VFS provides posix_acl_{access,default}_xattr_handler. The posix_acl_xattr_set() handler which is used for the ->set() method of posix_acl_{access,default}_xattr_handler uses posix_acl_from_xattr() to translate from the uapi POSIX ACL format to the VFS format so that it can be passed to the i_op->set_acl() handler of the filesystem or for direct caching in case no i_op->set_acl() handler is defined. During setxattr() the {g,u}id values stored as ACL_{GROUP,USER} entries in the uapi POSIX ACL format aren't raw {g,u}id values that need to be mapped according to the filesystem's idmapping. Instead they are {g,u}id values in the caller's idmapping which have been generated during posix_acl_fix_xattr_from_user(). In other words, they are k{g,u}id_t which are passed as raw {g,u}id values abusing the uapi POSIX ACL format (Please note that this type safety violation has existed since the introduction of k{g,u}id_t. Please see [1] for more details.). So when posix_acl_from_xattr() is called in posix_acl_xattr_set() the filesystem idmapping is completely irrelevant. Instead, we abuse the initial idmapping to recover the k{g,u}id_t base on the value stored in raw {g,u}id as ACL_{GROUP,USER} in the uapi POSIX ACL format. We need to clearly distinguish betweeen these two operations as it is really easy to confuse for filesystems as can be seen in ntfs3. In order to do this we factor out make_posix_acl() which takes callbacks allowing callers to pass dedicated methods to generate the correct k{g,u}id_t. This is just an internal static helper which is not exposed to any filesystems but it neatly encapsulates the basic logic of walking through a uapi POSIX ACL and returning an allocated VFS POSIX ACL with the correct k{g,u}id_t values. The posix_acl_from_xattr() helper can then be implemented as a simple call to make_posix_acl() with callbacks that generate the correct k{g,u}id_t from the raw {g,u}id values in ACL_{GROUP,USER} entries in the uapi POSIX ACL format as read from the backing store. For setxattr() we add a new helper vfs_set_acl_prepare() which has callbacks to map the POSIX ACLs from the uapi format with the k{g,u}id_t values stored in raw {g,u}id format in ACL_{GROUP,USER} entries into the correct k{g,u}id_t values in the filesystem idmapping. In contrast to posix_acl_from_xattr() the vfs_set_acl_prepare() helper needs to take the mount idmapping into account. The differences are explained in more detail in the kernel doc for the new functions. In follow up patches we will remove all abuses of posix_acl_from_xattr() for setxattr() operations and replace it with calls to vfs_set_acl_prepare(). The new vfs_set_acl_prepare() helper allows us to deal with the ambiguity in how the POSI ACL uapi struct stores {g,u}id values depending on whether this is a getxattr() or setxattr() operation. This also allows us to remove the posix_acl_setxattr_idmapped_mnt() helper reducing the abuse of the POSIX ACL uapi format to pass values that should be distinct types in {g,u}id values stored as ACL_{GROUP,USER} entries. The removal of posix_acl_setxattr_idmapped_mnt() in turn allows us to re-constify the value parameter of vfs_setxattr() which in turn allows us to avoid the nasty cast from a const void pointer to a non-const void pointer on ovl_do_setxattr(). Ultimately, the plan is to get rid of the type violations completely and never pass the values from k{g,u}id_t as raw {g,u}id in ACL_{GROUP,USER} entries in uapi POSIX ACL format. But that's a longer way to go and this is a preparatory step. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Co-Developed-by: Seth Forshee Signed-off-by: Christian Brauner (Microsoft) --- include/linux/posix_acl_xattr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index b6bd3eac2bcc..47eca15fd842 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -66,6 +66,9 @@ struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); +struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const void *value, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; -- cgit v1.2.3 From d8f3f5834febb74d18b5b2098f67d9db740f3e30 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Aug 2022 10:36:32 -0700 Subject: rcu: Update rcu_access_pointer() header for rcu_dereference_protected() The rcu_access_pointer() docbook header correctly notes that it may be used during post-grace-period teardown. However, it is usually better to use rcu_dereference_protected() for this purpose. This commit therefore calls out this preferred usage. Reported-by: Maxim Mikityanskiy Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..61a1a85c720c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -496,13 +496,21 @@ do { \ * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. + * Within an RCU read-side critical section, there is little reason to + * use rcu_access_pointer(). + * + * It is usually best to test the rcu_access_pointer() return value + * directly in order to avoid accidental dereferences being introduced + * by later inattentive changes. In other words, assigning the + * rcu_access_pointer() return value to a local variable results in an + * accident waiting to happen. * * It is also permissible to use rcu_access_pointer() when read-side - * access to the pointer was removed at least one grace period ago, as - * is the case in the context of the RCU callback that is freeing up - * the data, or after a synchronize_rcu() returns. This can be useful - * when tearing down multi-linked structures after a grace period - * has elapsed. + * access to the pointer was removed at least one grace period ago, as is + * the case in the context of the RCU callback that is freeing up the data, + * or after a synchronize_rcu() returns. This can be useful when tearing + * down multi-linked structures after a grace period has elapsed. However, + * rcu_dereference_protected() is normally preferred for this use case. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) -- cgit v1.2.3 From 91a967fd6934abc0c7e4b0d26728e38674278707 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jul 2022 15:37:05 -0700 Subject: rcu: Add full-sized polling for get_completed*() and poll_state*() The get_completed_synchronize_rcu() and poll_state_synchronize_rcu() APIs compress the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds the first members of the full-state RCU grace-period polling API, namely the get_completed_synchronize_rcu_full() and poll_state_synchronize_rcu_full() functions. These use up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but which are guaranteed not to miss grace periods, at least in situations where the single-CPU grace-period optimization does not apply. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +++ include/linux/rcutiny.h | 9 +++++++++ include/linux/rcutree.h | 8 ++++++++ 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..faaa174dfb27 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -42,7 +42,10 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); + +struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); #ifdef CONFIG_PREEMPT_RCU diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 62815c0a2dce..1fbff8600d92 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,10 +14,19 @@ #include /* for HZ */ +struct rcu_gp_oldstate { + unsigned long rgos_norm; +}; + unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); +static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + return poll_state_synchronize_rcu(rgosp->rgos_norm); +} + static inline void cond_synchronize_rcu(unsigned long oldstate) { might_sleep(); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 47eaa4cb0df7..4ccbc3aa9dc2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,11 +40,19 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); + +struct rcu_gp_oldstate { + unsigned long rgos_norm; + unsigned long rgos_exp; + unsigned long rgos_polled; +}; + unsigned long start_poll_synchronize_rcu_expedited(void); void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); bool rcu_is_idle_cpu(int cpu); -- cgit v1.2.3 From 3fdefca9b42c8bebe3beea5c1a067c9718ca0fc7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jul 2022 19:58:13 -0700 Subject: rcu: Add full-sized polling for get_state() The get_state_synchronize_rcu() API compresses the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds the next member of the full-state RCU grace-period polling API, namely the get_state_synchronize_rcu_full() function. This uses up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but is guaranteed not to miss grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 6 ++++++ include/linux/rcutree.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 1fbff8600d92..6e299955c4e9 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -19,6 +19,12 @@ struct rcu_gp_oldstate { }; unsigned long get_state_synchronize_rcu(void); + +static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = get_state_synchronize_rcu(); +} + unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 4ccbc3aa9dc2..7b769f1b417a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -50,6 +50,7 @@ struct rcu_gp_oldstate { unsigned long start_poll_synchronize_rcu_expedited(void); void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -- cgit v1.2.3 From 76ea364161e72b1878126edf8d507d2a62fb47b0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 2 Aug 2022 17:04:54 -0700 Subject: rcu: Add full-sized polling for start_poll() The start_poll_synchronize_rcu() API compresses the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds the next member of the full-state RCU grace-period polling API, namely the start_poll_synchronize_rcu_full() function. This uses up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but is guaranteed not to miss grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 6 ++++++ include/linux/rcutree.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6e299955c4e9..6bc30e46a819 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -26,6 +26,12 @@ static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) } unsigned long start_poll_synchronize_rcu(void); + +static inline void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu(); +} + bool poll_state_synchronize_rcu(unsigned long oldstate); static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 7b769f1b417a..8f2e0f0b26f6 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -52,6 +52,7 @@ void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); -- cgit v1.2.3 From 6c502b14ba66da0670a59e20354469fa56eab26c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Aug 2022 12:38:51 -0700 Subject: rcu: Add full-sized polling for start_poll_expedited() The start_poll_synchronize_rcu_expedited() API compresses the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds yet another member of the full-state RCU grace-period polling API, which is the start_poll_synchronize_rcu_expedited_full() function. This uses up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but is guaranteed not to miss grace periods. [ paulmck: Apply feedback from kernel test robot and Julia Lawall. ] Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 5 +++++ include/linux/rcutree.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6bc30e46a819..653e35777a99 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -49,6 +49,11 @@ static inline unsigned long start_poll_synchronize_rcu_expedited(void) return start_poll_synchronize_rcu(); } +static inline void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu_expedited(); +} + static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) { cond_synchronize_rcu(oldstate); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 8f2e0f0b26f6..7151fd861736 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,6 +48,7 @@ struct rcu_gp_oldstate { }; unsigned long start_poll_synchronize_rcu_expedited(void); +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); unsigned long get_state_synchronize_rcu(void); void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -- cgit v1.2.3 From b6fe4917ae4353b397079902cb024ae01f20dfb2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2022 13:46:05 -0700 Subject: rcu: Add full-sized polling for cond_sync_full() The cond_synchronize_rcu() API compresses the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds yet another member of the full-state RCU grace-period polling API, which is the cond_synchronize_rcu_full() function. This uses up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but is guaranteed not to miss grace periods. [ paulmck: Apply feedback from kernel test robot and Julia Lawall. ] Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 5 +++++ include/linux/rcutree.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 653e35777a99..3bee97f76bf4 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -44,6 +44,11 @@ static inline void cond_synchronize_rcu(unsigned long oldstate) might_sleep(); } +static inline void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu(rgosp->rgos_norm); +} + static inline unsigned long start_poll_synchronize_rcu_expedited(void) { return start_poll_synchronize_rcu(); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 7151fd861736..1b44288c027d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -57,6 +57,7 @@ void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool rcu_is_idle_cpu(int cpu); -- cgit v1.2.3 From 8df13f01608ea48712956c0b1afce35bdba5a1c5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2022 15:23:26 -0700 Subject: rcu: Add full-sized polling for cond_sync_exp_full() The cond_synchronize_rcu_expedited() API compresses the combined expedited and normal grace-period states into a single unsigned long, which conserves storage, but can miss grace periods in certain cases involving overlapping normal and expedited grace periods. Missing the occasional grace period is usually not a problem, but there are use cases that care about each and every grace period. This commit therefore adds yet another member of the full-state RCU grace-period polling API, which is the cond_synchronize_rcu_exp_full() function. This uses up to three times the storage (rcu_gp_oldstate structure instead of unsigned long), but is guaranteed not to miss grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 5 +++++ include/linux/rcutree.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 3bee97f76bf4..4405e9112cee 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,6 +64,11 @@ static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) cond_synchronize_rcu(oldstate); } +static inline void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu_expedited(rgosp->rgos_norm); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 1b44288c027d..755b082f4ec6 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -50,6 +50,7 @@ struct rcu_gp_oldstate { unsigned long start_poll_synchronize_rcu_expedited(void); void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); unsigned long get_state_synchronize_rcu(void); void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); -- cgit v1.2.3 From 7ecef0871dd9a879038dbe8a681ab48bd0c92988 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2022 17:54:53 -0700 Subject: rcu: Remove ->rgos_polled field from rcu_gp_oldstate structure Because both normal and expedite