From f9c82a4ea89c384d49ce03768ba88d049ed3f1f0 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:08 +0200
Subject: Increase size of ucounts to atomic_long_t

RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK use unsigned long to store their
counters. As a preparation for moving rlimits based on ucounts, we need
to increase the size of the variable to long.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/257aa5fb1a7d81cf0f4c34f39ada2320c4284771.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f6c5f784be5a..c242c10906c5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -88,7 +88,7 @@ struct user_namespace {
 	struct ctl_table_header *sysctls;
 #endif
 	struct ucounts		*ucounts;
-	int ucount_max[UCOUNT_COUNTS];
+	long ucount_max[UCOUNT_COUNTS];
 } __randomize_layout;
 
 struct ucounts {
@@ -96,7 +96,7 @@ struct ucounts {
 	struct user_namespace *ns;
 	kuid_t uid;
 	int count;
-	atomic_t ucount[UCOUNT_COUNTS];
+	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
 extern struct user_namespace init_user_ns;
-- 
cgit v1.2.3


From 905ae01c4ae2ae3df05bb141801b1db4b7d83c61 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:09 +0200
Subject: Add a reference to ucounts for each cred

For RLIMIT_NPROC and some other rlimits the user_struct that holds the
global limit is kept alive for the lifetime of a process by keeping it
in struct cred. Adding a pointer to ucounts in the struct cred will
allow to track RLIMIT_NPROC not only for user in the system, but for
user in the user_namespace.

Updating ucounts may require memory allocation which may fail. So, we
cannot change cred.ucounts in the commit_creds() because this function
cannot fail and it should always return 0. For this reason, we modify
cred.ucounts before calling the commit_creds().

Changelog

v6:
* Fix null-ptr-deref in is_ucounts_overlimit() detected by trinity. This
  error was caused by the fact that cred_alloc_blank() left the ucounts
  pointer empty.

Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/b37aaef28d8b9b0d757e07ba6dd27281bbe39259.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/cred.h           | 2 ++
 include/linux/user_namespace.h | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4c6350503697..66436e655032 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -144,6 +144,7 @@ struct cred {
 #endif
 	struct user_struct *user;	/* real user ID subscription */
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
+	struct ucounts *ucounts;
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	/* RCU deletion */
 	union {
@@ -170,6 +171,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
 extern int set_create_files_as(struct cred *, struct inode *);
 extern int cred_fscmp(const struct cred *, const struct cred *);
 extern void __init cred_init(void);
+extern int set_cred_ucounts(struct cred *);
 
 /*
  * check for validity of credentials
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index c242c10906c5..7919b80d57ed 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -100,11 +100,15 @@ struct ucounts {
 };
 
 extern struct user_namespace init_user_ns;
+extern struct ucounts init_ucounts;
 
 bool setup_userns_sysctls(struct user_namespace *ns);
 void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
+struct ucounts *get_ucounts(struct ucounts *ucounts);
+void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
 
-- 
cgit v1.2.3


From b6c336528926ef73b0f70260f2636de2c3b94c14 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:10 +0200
Subject: Use atomic_t for ucounts reference counting

The current implementation of the ucounts reference counter requires the
use of spin_lock. We're going to use get_ucounts() in more performance
critical areas like a handling of RLIMIT_SIGPENDING.

Now we need to use spin_lock only if we want to change the hashtable.

v10:
* Always try to put ucounts in case we cannot increase ucounts->count.
  This will allow to cover the case when all consumers will return
  ucounts at once.

v9:
* Use a negative value to check that the ucounts->count is close to
  overflow.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/94d1dbecab060a6b116b0a2d1accd8ca1bbb4f5f.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7919b80d57ed..80b5bf12feae 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -95,7 +95,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	int count;
+	atomic_t count;
 	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
@@ -107,7 +107,7 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
-struct ucounts *get_ucounts(struct ucounts *ucounts);
+struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
-- 
cgit v1.2.3


From 21d1c5e386bc751f1953b371d72cd5b7d9c9e270 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:11 +0200
Subject: Reimplement RLIMIT_NPROC on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

To illustrate the impact of rlimits, let's say there is a program that
does not fork. Some service-A wants to run this program as user X in
multiple containers. Since the program never fork the service wants to
set RLIMIT_NPROC=1.

service-A
 \- program (uid=1000, container1, rlimit_nproc=1)
 \- program (uid=1000, container2, rlimit_nproc=1)

The service-A sets RLIMIT_NPROC=1 and runs the program in container1.
When the service-A tries to run a program with RLIMIT_NPROC=1 in
container2 it fails since user X already has one running process.

We cannot use existing inc_ucounts / dec_ucounts because they do not
allow us to exceed the maximum for the counter. Some rlimits can be
overlimited by root or if the user has the appropriate capability.

Changelog

v11:
* Change inc_rlimit_ucounts() which now returns top value of ucounts.
* Drop inc_rlimit_ucounts_and_test() because the return code of
  inc_rlimit_ucounts() can be checked.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/c5286a8aa16d2d698c222f7532f3d735c82bc6bc.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/cred.h           |  2 ++
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h | 12 ++++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 66436e655032..5ca1e8a1d035 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -372,6 +372,7 @@ static inline void put_cred(const struct cred *_cred)
 
 #define task_uid(task)		(task_cred_xxx((task), uid))
 #define task_euid(task)		(task_cred_xxx((task), euid))
+#define task_ucounts(task)	(task_cred_xxx((task), ucounts))
 
 #define current_cred_xxx(xxx)			\
 ({						\
@@ -388,6 +389,7 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
+#define current_ucounts()	(current_cred_xxx(ucounts))
 
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index a8ec3b6093fc..d33d867ad6c1 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 80b5bf12feae..4a97acc35990 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,12 @@ enum ucount_type {
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
 #endif
+	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_COUNTS,
 };
 
+#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -110,6 +113,15 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
 struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
+static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
+{
+	return atomic_long_read(&ucounts->ucount[type]);
+}
+
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
-- 
cgit v1.2.3


From 6e52a9f0532f912af37bab4caf18b57d1b9845f4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:12 +0200
Subject: Reimplement RLIMIT_MSGQUEUE on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/2531f42f7884bbfee56a978040b3e0d25cdf6cde.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/user.h     | 4 ----
 include/linux/user_namespace.h | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index d33d867ad6c1..8a34446681aa 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,10 +18,6 @@ struct user_struct {
 #endif
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4a97acc35990..5eeb86b00e68 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -51,6 +51,7 @@ enum ucount_type {
 	UCOUNT_INOTIFY_WATCHES,
 #endif
 	UCOUNT_RLIMIT_NPROC,
+	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_COUNTS,
 };
 
-- 
cgit v1.2.3


From d64696905554e919321e31afc210606653b8f6a4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:13 +0200
Subject: Reimplement RLIMIT_SIGPENDING on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Revert most of changes to fix performance issues.

v10:
* Fix memory leak on get_ucounts failure.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/df9d7764dddd50f28616b7840de74ec0f81711a8.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/user.h     | 1 -
 include/linux/signal_types.h   | 4 +++-
 include/linux/user_namespace.h | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8a34446681aa..8ba9cec4fb99 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
 #endif
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 68e06c75c5b2..34cb28b8f16c 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -13,6 +13,8 @@ typedef struct kernel_siginfo {
 	__SIGINFO;
 } kernel_siginfo_t;
 
+struct ucounts;
+
 /*
  * Real Time signals may be queued.
  */
@@ -21,7 +23,7 @@ struct sigqueue {
 	struct list_head list;
 	int flags;
 	kernel_siginfo_t info;
-	struct user_struct *user;
+	struct ucounts *ucounts;
 };
 
 /* flags values. */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 5eeb86b00e68..58f417986472 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -52,6 +52,7 @@ enum ucount_type {
 #endif
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
+	UCOUNT_RLIMIT_SIGPENDING,
 	UCOUNT_COUNTS,
 };
 
-- 
cgit v1.2.3


From d7c9e99aee48e6bc0b427f3e3c658a6aba15001e Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:14 +0200
Subject: Reimplement RLIMIT_MEMLOCK on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Fix issue found by lkp robot.

v8:
* Fix issues found by lkp-tests project.

v7:
* Keep only ucounts for RLIMIT_MEMLOCK checks instead of struct cred.

v6:
* Fix bug in hugetlb_file_setup() detected by trinity.

Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/970d50c70c71bfd4496e0e8d2a0a32feebebb350.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/hugetlb.h        | 4 ++--
 include/linux/mm.h             | 4 ++--
 include/linux/sched/user.h     | 1 -
 include/linux/shmem_fs.h       | 2 +-
 include/linux/user_namespace.h | 1 +
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cccd1aab69dd..96d63dbdec65 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -434,7 +434,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
-				struct user_struct **user, int creat_flags,
+				struct ucounts **ucounts, int creat_flags,
 				int page_size_log);
 
 static inline bool is_file_hugepages(struct file *file)
@@ -454,7 +454,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
 #define is_file_hugepages(file)			false
 static inline struct file *
 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
-		struct user_struct **user, int creat_flags,
+		struct ucounts **ucounts, int creat_flags,
 		int page_size_log)
 {
 	return ERR_PTR(-ENOSYS);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ba434287387..3b4e24738ce4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1670,8 +1670,8 @@ extern bool can_do_mlock(void);
 #else
 static inline bool can_do_mlock(void) { return false; }
 #endif
-extern int user_shm_lock(size_t, struct user_struct *);
-extern void user_shm_unlock(size_t, struct user_struct *);
+extern int user_shm_lock(size_t, struct ucounts *);
+extern void user_shm_unlock(size_t, struct ucounts *);
 
 /*
  * Parameter block passed down to zap_pte_range in exceptional cases.
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8ba9cec4fb99..82bd2532da6b 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,7 +18,6 @@ struct user_struct {
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
 #endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index d82b6f396588..aa77dcd1646f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
 #ifdef CONFIG_SHMEM
 extern const struct address_space_operations shmem_aops;
 static inline bool shmem_mapping(struct address_space *mapping)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 58f417986472..2a3177b9b8bf 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -53,6 +53,7 @@ enum ucount_type {
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_RLIMIT_SIGPENDING,
+	UCOUNT_RLIMIT_MEMLOCK,
 	UCOUNT_COUNTS,
 };
 
-- 
cgit v1.2.3


From c1ada3dc7219b02b3467aa906c2f5f8b098578d1 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:16 +0200
Subject: ucounts: Set ucount_max to the largest positive value the type can
 hold

The ns->ucount_max[] is signed long which is less than the rlimit size.
We have to protect ucount_max[] from overflow and only use the largest
value that we can hold.

On 32bit using "long" instead of "unsigned long" to hold the counts has
the downside that RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK are limited to 2GiB
instead of 4GiB. I don't think anyone cares but it should be mentioned
in case someone does.

The RLIMIT_NPROC and RLIMIT_SIGPENDING used atomic_t so their maximum
hasn't changed.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/1825a5dfa18bc5a570e79feb05e2bd07fd57e7e3.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 2a3177b9b8bf..61794ae32fa8 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -125,6 +125,12 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 
+static inline void set_rlimit_ucount_max(struct user_namespace *ns,
+		enum ucount_type type, unsigned long max)
+{
+	ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
+}
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
-- 
cgit v1.2.3


From 6be388f4a35d2ce5ef7dbf635a8964a5da7f799f Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Sun, 25 Apr 2021 23:03:53 +0530
Subject: HID: usbhid: fix info leak in hid_submit_ctrl

In hid_submit_ctrl(), the way of calculating the report length doesn't
take into account that report->size can be zero. When running the
syzkaller reproducer, a report of size 0 causes hid_submit_ctrl) to
calculate transfer_buffer_length as 16384. When this urb is passed to
the usb core layer, KMSAN reports an info leak of 16384 bytes.

To fix this, first modify hid_report_len() to account for the zero
report size case by using DIV_ROUND_UP for the division. Then, call it
from hid_submit_ctrl().

Reported-by: syzbot+7c2bb71996f95a82524c@syzkaller.appspotmail.com
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 271021e20a3f..10e922cee4eb 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1167,8 +1167,7 @@ static inline void hid_hw_wait(struct hid_device *hdev)
  */
 static inline u32 hid_report_len(struct hid_report *report)
 {
-	/* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */
-	return ((report->size - 1) >> 3) + 1 + (report->id > 0);
+	return DIV_ROUND_UP(report->size, 8) + (report->id > 0);
 }
 
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
-- 
cgit v1.2.3


From 0e4768713e71dd224633fd7e00ad358bc48f433a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:31 +0300
Subject: spi: pxa2xx: Replace header inclusions by forward declarations

When the data structure is only referred by pointer, compiler may not need
to see the contents of the data type. Thus, we may replace header inclusions
by respective forward declarations. Due to above add missed headers as well.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/pxa2xx_spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 31f00c7f4f59..1e0e2f136319 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -5,6 +5,8 @@
 #ifndef __linux_pxa2xx_spi_h
 #define __linux_pxa2xx_spi_h
 
+#include <linux/types.h>
+
 #include <linux/pxa2xx_ssp.h>
 
 #define PXA2XX_CS_ASSERT (0x01)
-- 
cgit v1.2.3


From 5edc24901f4d469f8fc943004f73655933e89dbf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:32 +0300
Subject: spi: pxa2xx: Unify ifdeffery used in the headers

The two headers have quite different ifdeffery to prevent multiple inclusion.
Unify them with the pattern that in particular reflects their location.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h     | 6 +++---
 include/linux/spi/pxa2xx_spi.h | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 7f73b26ed22e..14b049840faf 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -11,8 +11,8 @@
  *       PXA3xx     SSP1, SSP2, SSP3, SSP4
  */
 
-#ifndef __LINUX_SSP_H
-#define __LINUX_SSP_H
+#ifndef __LINUX_PXA2XX_SSP_H
+#define __LINUX_PXA2XX_SSP_H
 
 #include <linux/bits.h>
 #include <linux/compiler_types.h>
@@ -270,4 +270,4 @@ static inline struct ssp_device *pxa_ssp_request_of(const struct device_node *n,
 static inline void pxa_ssp_free(struct ssp_device *ssp) {}
 #endif
 
-#endif
+#endif	/* __LINUX_PXA2XX_SSP_H */
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 1e0e2f136319..12ef04d0896d 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -2,8 +2,8 @@
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
  */
-#ifndef __linux_pxa2xx_spi_h
-#define __linux_pxa2xx_spi_h
+#ifndef __LINUX_SPI_PXA2XX_SPI_H
+#define __LINUX_SPI_PXA2XX_SPI_H
 
 #include <linux/types.h>
 
@@ -51,4 +51,5 @@ struct pxa2xx_spi_chip {
 extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info);
 
 #endif
-#endif
+
+#endif	/* __LINUX_SPI_PXA2XX_SPI_H */
-- 
cgit v1.2.3


From 1beb37b0e3f98708bfb37778049764b4500756da Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:33 +0300
Subject: spi: pxa2xx: Group Intel Quark specific definitions

DDS_RATE is Intel Quark specific definition. Move it to the rest
Intel Quark related.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 14b049840faf..1b6c1a0922bd 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -38,7 +38,6 @@ struct device_node;
 #define SSDR		(0x10)  /* SSP Data Write/Data Read Register */
 
 #define SSTO		(0x28)  /* SSP Time Out Register */
-#define DDS_RATE	(0x28)  /* SSP DDS Clock Rate Register (Intel Quark) */
 #define SSPSP		(0x2C)  /* SSP Programmable Serial Protocol */
 #define SSTSA		(0x30)  /* SSP Tx Timeslot Active */
 #define SSRSA		(0x34)  /* SSP Rx Timeslot Active */
@@ -105,6 +104,9 @@ struct device_node;
 #define CE4100_SSCR1_RFT	GENMASK(11, 10)	/* Receive FIFO Threshold (mask) */
 #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..4] */
 
+/* Intel Quark X1000 */
+#define DDS_RATE		0x28		 /* SSP DDS Clock Rate Register */
+
 /* QUARK_X1000 SSCR0 bit definition */
 #define QUARK_X1000_SSCR0_DSS		GENMASK(4, 0)	/* Data Size Select (mask) */
 #define QUARK_X1000_SSCR0_DataSize(x)	((x) - 1)	/* Data Size Select [4..32] */
-- 
cgit v1.2.3


From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 5 May 2021 17:44:22 -0700
Subject: usb: typec: tcpm: Don't block probing of consumers of "connector"
 nodes

fw_devlink expects DT device nodes with "compatible" property to have
struct devices created for them. Since the connector node might not be
populated as a device, mark it as such so that fw_devlink knows not to
wait on this fwnode being populated as a struct device.

Without this patch, USB functionality can be broken on some boards.

Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fwnode.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index ed4e67a7ff1c..59828516ebaf 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void);
 extern bool fw_devlink_is_strict(void);
 int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup);
 void fwnode_links_purge(struct fwnode_handle *fwnode);
+void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode);
 
 #endif
-- 
cgit v1.2.3


From c745253e2a691a40c66790defe85c104a887e14a Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 5 May 2021 14:09:15 +0300
Subject: PM: runtime: Fix unpaired parent child_count for force_resume

As pm_runtime_need_not_resume() relies also on usage_count, it can return
a different value in pm_runtime_force_suspend() compared to when called in
pm_runtime_force_resume(). Different return values can happen if anything
calls PM runtime functions in between, and causes the parent child_count
to increase on every resume.

So far I've seen the issue only for omapdrm that does complicated things
with PM runtime calls during system suspend for legacy reasons:

omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
pm_runtime_force_suspend() for 58000000.dss, !pm_runtime_need_not_resume()
 __update_runtime_status()
system suspended
pm_runtime_force_resume() for 58000000.dss, pm_runtime_need_not_resume()
 pm_runtime_enable() only called because of pm_runtime_need_not_resume()
omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
...
rpm_suspend for 58000000.dss but parent child_count is now unbalanced

Let's fix the issue by adding a flag for needs_force_resume and use it in
pm_runtime_force_resume() instead of pm_runtime_need_not_resume().

Additionally omapdrm system suspend could be simplified later on to avoid
lots of unnecessary PM runtime calls and the complexity it adds. The
driver can just use internal functions that are shared between the PM
runtime and system suspend related functions.

Fixes: 4918e1f87c5f ("PM / runtime: Rework pm_runtime_force_suspend/resume()")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Cc: 4.16+ <stable@vger.kernel.org> # 4.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index c9657408fee1..1d8209c09686 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -601,6 +601,7 @@ struct dev_pm_info {
 	unsigned int		idle_notification:1;
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
+	unsigned int		needs_force_resume:1;
 	unsigned int		runtime_auto:1;
 	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
-- 
cgit v1.2.3


From 2515dd6ce8e545b0b2eece84920048ef9ed846c4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 19 Apr 2021 16:17:41 -0700
Subject: stack: Replace "o" output with "r" input constraint

"o" isn't a common asm() constraint to use; it triggers an assertion in
assert-enabled builds of LLVM that it's not recognized when targeting
aarch64 (though it appears to fall back to "m"). It's fixed in LLVM 13 now,
but there isn't really a good reason to use "o" in particular here. To
avoid causing build issues for those using assert-enabled builds of earlier
LLVM versions, the constraint needs changing.

Instead, if the point is to retain the __builtin_alloca(), make ptr appear
to "escape" via being an input to an empty inline asm block. This is
preferable anyways, since otherwise this looks like a dead store.

While the use of "r" was considered in

  https://lore.kernel.org/lkml/202104011447.2E7F543@keescook/

it was only tested as an output (which looks like a dead store, and wasn't
sufficient).

Use "r" as an input constraint instead, which behaves correctly across
compilers and architectures.

Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://reviews.llvm.org/D100412
Link: https://bugs.llvm.org/show_bug.cgi?id=49956
Link: https://lore.kernel.org/r/20210419231741.4084415-1-keescook@chromium.org
---
 include/linux/randomize_kstack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index fd80fab663a9..bebc911161b6 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size);
 		u32 offset = raw_cpu_read(kstack_offset);		\
 		u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
-		asm volatile("" : "=o"(*ptr) :: "memory");		\
+		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
 } while (0)
 
-- 
cgit v1.2.3


From 0c8ccd8b267fc735e4621774ce62728f27d42863 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:29 +0300
Subject: spi: pxa2xx: Use pxa_ssp_enable()/pxa_ssp_disable() in the driver

There are few places that repeat the logic of pxa_ssp_enable() and
pxa_ssp_disable(). Use them instead of open coded variants.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-10-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 1b6c1a0922bd..fdfbe17e15f4 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -254,6 +254,22 @@ static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
 	return __raw_readl(dev->mmio_base + reg);
 }
 
+static inline void pxa_ssp_enable(struct ssp_device *ssp)
+{
+	u32 sscr0;
+
+	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) | SSCR0_SSE;
+	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
+}
+
+static inline void pxa_ssp_disable(struct ssp_device *ssp)
+{
+	u32 sscr0;
+
+	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) & ~SSCR0_SSE;
+	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
+}
+
 #if IS_ENABLED(CONFIG_PXA_SSP)
 struct ssp_device *pxa_ssp_request(int port, const char *label);
 void pxa_ssp_free(struct ssp_device *);
-- 
cgit v1.2.3


From 3fdb59cf10b020b32b9f1dfc78611320623dcb3e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:34 +0300
Subject: spi: pxa2xx: Introduce special type for Merrifield SPIs

Intel Merrifield SPI is actually more closer to PXA3xx. It has extended FIFO
(32 bytes) and additional registers to get or set FIFO thresholds.

Introduce new type for Intel Merrifield SPI host controllers and handle bigger
FIFO size.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-15-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index fdfbe17e15f4..2b21bc1f3c73 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -183,6 +183,21 @@ struct device_node;
 #define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
 #define SSACD_SCDX8		BIT(7)		/* SYSCLK division ratio select */
 
+/* Intel Merrifield SSP */
+#define SFIFOL			0x68		/* FIFO level */
+#define SFIFOTT			0x6c		/* FIFO trigger threshold */
+
+#define RX_THRESH_MRFLD_DFLT	16
+#define TX_THRESH_MRFLD_DFLT	16
+
+#define SFIFOL_TFL_MASK		GENMASK(15, 0)	/* Transmit FIFO Level mask */
+#define SFIFOL_RFL_MASK		GENMASK(31, 16)	/* Receive FIFO Level mask */
+
+#define SFIFOTT_TFT		GENMASK(15, 0)	/* Transmit FIFO Threshold (mask) */
+#define SFIFOTT_TxThresh(x)	(((x) - 1) << 0)	/* TX FIFO trigger threshold / level */
+#define SFIFOTT_RFT		GENMASK(31, 16)	/* Receive FIFO Threshold (mask) */
+#define SFIFOTT_RxThresh(x)	(((x) - 1) << 16)	/* RX FIFO trigger threshold / level */
+
 /* LPSS SSP */
 #define SSITF			0x44		/* TX FIFO trigger level */
 #define SSITF_TxHiThresh(x)	(((x) - 1) << 0)
@@ -205,6 +220,7 @@ enum pxa_ssp_type {
 	MMP2_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
+	MRFLD_SSP,
 	QUARK_X1000_SSP,
 	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
 	LPSS_BYT_SSP,
-- 
cgit v1.2.3


From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:42 +0300
Subject: spi: Switch to signed types for *_native_cs SPI controller fields

While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid
undefined behaviour when counting unused native CSs") missed the case
when all CSs are GPIOs and thus unused_native_cs will be evaluated to
-1 in unsigned representation. This will falsely trigger a condition
in the spi_get_gpio_descs().

Switch to signed types for *_native_cs SPI controller fields to fix above.

Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 360a3bc767ca..74239d65c7fd 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -644,8 +644,8 @@ struct spi_controller {
 	int			*cs_gpios;
 	struct gpio_desc	**cs_gpiods;
 	bool			use_gpio_descriptors;
-	u8			unused_native_cs;
-	u8			max_native_cs;
+	s8			unused_native_cs;
+	s8			max_native_cs;
 
 	/* statistics */
 	struct spi_statistics	statistics;
-- 
cgit v1.2.3


From efed9a3337e341bd0989161b97453b52567bc59d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 10 May 2021 17:05:35 -0700
Subject: kyber: fix out of bounds access when preempted

__blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and
passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx
for the current CPU again and uses that to get the corresponding Kyber
context in the passed hctx. However, the thread may be preempted between
the two calls to blk_mq_get_ctx(), and the ctx returned the second time
may no longer correspond to the passed hctx. This "works" accidentally
most of the time, but it can cause us to read garbage if the second ctx
came from an hctx with more ctx's than the first one (i.e., if
ctx->index_hw[hctx->type] > hctx->nr_ctx).

This manifested as this UBSAN array index out of bounds error reported
by Jakub:

UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9
index 13106 is out of range for type 'long unsigned int [128]'
Call Trace:
 dump_stack+0xa4/0xe5
 ubsan_epilogue+0x5/0x40
 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34
 queued_spin_lock_slowpath+0x476/0x480
 do_raw_spin_lock+0x1c2/0x1d0
 kyber_bio_merge+0x112/0x180
 blk_mq_submit_bio+0x1f5/0x1100
 submit_bio_noacct+0x7b0/0x870
 submit_bio+0xc2/0x3a0
 btrfs_map_bio+0x4f0/0x9d0
 btrfs_submit_data_bio+0x24e/0x310
 submit_one_bio+0x7f/0xb0
 submit_extent_page+0xc4/0x440
 __extent_writepage_io+0x2b8/0x5e0
 __extent_writepage+0x28d/0x6e0
 extent_write_cache_pages+0x4d7/0x7a0
 extent_writepages+0xa2/0x110
 do_writepages+0x8f/0x180
 __writeback_single_inode+0x99/0x7f0
 writeback_sb_inodes+0x34e/0x790
 __writeback_inodes_wb+0x9e/0x120
 wb_writeback+0x4d2/0x660
 wb_workfn+0x64d/0xa10
 process_one_work+0x53a/0xa80
 worker_thread+0x69/0x5b0
 kthread+0x20b/0x240
 ret_from_fork+0x1f/0x30

Only Kyber uses the hctx, so fix it by passing the request_queue to
->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can
map the queues itself to avoid the mismatch.

Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/elevator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1fe8e105b83b..dcb2f9022c1d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -34,7 +34,7 @@ struct elevator_mq_ops {
 	void (*depth_updated)(struct blk_mq_hw_ctx *);
 
 	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
-	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
+	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-- 
cgit v1.2.3


From c5895d3f06cbb80ccb311f1dcb37074651030cb6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:42 +0200
Subject: sched: Simplify sched_info_on()

The situation around sched_info is somewhat complicated, it is used by
sched_stats and delayacct and, indirectly, kvm.

If SCHEDSTATS=Y (but disabled by default) sched_info_on() is
unconditionally true -- this is the case for all distro kernel configs
I checked.

If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then
sched_info_on() can return false when delayacct is disabled,
presumably because there would be no other users left; except kvm is.

Instead of complicating matters further by accurately accounting
sched_stat and kvm state, simply unconditionally enable when
SCHED_INFO=Y, matching the common distro case.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210505111525.121458839@infradead.org
---
 include/linux/sched/stat.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 568286411b43..939c3ec9e1b9 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,6 +3,7 @@
 #define _LINUX_SCHED_STAT_H
 
 #include <linux/percpu.h>
+#include <linux/kconfig.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),
@@ -23,14 +24,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
 {
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
+	return IS_ENABLED(CONFIG_SCHED_INFO);
 }
 
 #ifdef CONFIG_SCHEDSTATS
-- 
cgit v1.2.3


From eee4d9fee2544389e5ce5697ed92db67c86d7a9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:36 +0200
Subject: delayacct: Add static_branch in scheduler hooks

Cheaper when delayacct is disabled.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210505111525.248028369@infradead.org
---
 include/linux/delayacct.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 21651f946751..57fefa54b53a 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -58,8 +58,10 @@ struct task_delay_info {
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/jump_label.h>
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+DECLARE_STATIC_KEY_TRUE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
@@ -114,6 +116,9 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
+	if (!static_branch_likely(&delayacct_key))
+		return;
+
 	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
 	if (current->delays)
 		__delayacct_blkio_start();
@@ -121,6 +126,9 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
+	if (!static_branch_likely(&delayacct_key))
+		return;
+
 	if (p->delays)
 		__delayacct_blkio_end(p);
 	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
-- 
cgit v1.2.3


From e4042ad492357fa995921376462b04a025dd53b6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:32 +0200
Subject: delayacct: Default disabled

Assuming this stuff isn't actually used much; disable it by default
and avoid allocating and tracking the task_delay_info structure.

taskstats is changed to still report the regular sched and sched_info
and only skip the missing task_delay_info fields instead of not
reporting anything.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20210505111525.308018373@infradead.org
---
 include/linux/delayacct.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 57fefa54b53a..225c8e01a111 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -61,7 +61,7 @@ struct task_delay_info {
 #include <linux/jump_label.h>
 
 #ifdef CONFIG_TASK_DELAY_ACCT
-DECLARE_STATIC_KEY_TRUE(delayacct_key);
+DECLARE_STATIC_KEY_FALSE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
@@ -69,7 +69,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(struct task_struct *);
-extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
+extern int delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
@@ -116,7 +116,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
-	if (!static_branch_likely(&delayacct_key))
+	if (!static_branch_unlikely(&delayacct_key))
 		return;
 
 	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
@@ -126,7 +126,7 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
-	if (!static_branch_likely(&delayacct_key))
+	if (!static_branch_unlikely(&delayacct_key))
 		return;
 
 	if (p->delays)
@@ -134,14 +134,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
 	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
 }
 
-static inline int delayacct_add_tsk(struct taskstats *d,
-					struct task_struct *tsk)
-{
-	if (!delayacct_on || !tsk->delays)
-		return 0;
-	return __delayacct_add_tsk(d, tsk);
-}
-
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 {
 	if (tsk->delays)
-- 
cgit v1.2.3


From 0cd7c741f01de13dc1eecf22557593b3514639bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 10 May 2021 14:01:00 +0200
Subject: delayacct: Add sysctl to enable at runtime

Just like sched_schedstats, allow runtime enabling (and disabling) of
delayacct. This is useful if one forgot to add the delayacct boot time
option.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/YJkhebGJAywaZowX@hirez.programming.kicks-ass.net
---
 include/linux/delayacct.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 225c8e01a111..af7e6eb50283 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -65,6 +65,10 @@ DECLARE_STATIC_KEY_FALSE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
+
+extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+			    size_t *lenp, loff_t *ppos);
+
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
-- 
cgit v1.2.3


From 8a311c740b53324ec584e0e3bb7077d56b123c28 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:36 -0500
Subject: sched: Basic tracking of matching tasks

Introduce task_struct::core_cookie as an opaque identifier for core
scheduling. When enabled; core scheduling will only allow matching
task to be on the core; where idle matches everything.

When task_struct::core_cookie is set (and core scheduling is enabled)
these tasks are indexed in a second RB-tree, first on cookie value
then on scheduling function, such that matching task selection always
finds the most elegible match.

NOTE: *shudder* at the overhead...

NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative
is per class tracking of cookies and that just duplicates a lot of
stuff for no raisin (the 2nd copy lives in the rt-mutex PI code).

[Joel: folded fixes]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.496975854@infradead.org
---
 include/linux/sched.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c881384517..45eedccf86aa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -700,10 +700,16 @@ struct task_struct {
 	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
+	struct sched_dl_entity		dl;
+
+#ifdef CONFIG_SCHED_CORE
+	struct rb_node			core_node;
+	unsigned long			core_cookie;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_UCLAMP_TASK
 	/*
-- 
cgit v1.2.3


From d2dfa17bc7de67e99685c4d6557837bf801a102c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:43 -0500
Subject: sched: Trivial forced-newidle balancer

When a sibling is forced-idle to match the core-cookie; search for
matching tasks to fill the core.

rcu_read_unlock() can incur an infrequent deadlock in
sched_core_balance(). Fix this by using the RCU-sched flavor instead.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.800048269@infradead.org
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 45eedccf86aa..9b822e383212 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -705,6 +705,7 @@ struct task_struct {
 #ifdef CONFIG_SCHED_CORE
 	struct rb_node			core_node;
 	unsigned long			core_cookie;
+	unsigned int			core_occupation;
 #endif
 
 #ifdef CONFIG_CGROUP_SCHED
-- 
cgit v1.2.3


From 6e33cad0af49336952e5541464bd02f5b5fd433e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 26 Mar 2021 18:55:06 +0100
Subject: sched: Trivial core scheduling cookie management

In order to not have to use pid_struct, create a new, smaller,
structure to manage task cookies for core scheduling.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.919768100@infradead.org
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9b822e383212..eab3f7c4251b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2179,4 +2179,10 @@ int sched_trace_rq_nr_running(struct rq *rq);
 
 const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
+#ifdef CONFIG_SCHED_CORE
+extern void sched_core_free(struct task_struct *tsk);
+#else
+static inline void sched_core_free(struct task_struct *tsk) { }
+#endif
+
 #endif
-- 
cgit v1.2.3


From 85dd3f61203c5cfa72b308ff327b5fbf3fc1ce5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 29 Mar 2021 15:18:35 +0200
Subject: sched: Inherit task cookie on fork()

Note that sched_core_fork() is called from under tasklist_lock, and
not from sched_fork() earlier. This avoids a few races later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.980003687@infradead.org
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eab3f7c4251b..fba47e52e482 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2181,8 +2181,10 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
 #ifdef CONFIG_SCHED_CORE
 extern void sched_core_free(struct task_struct *tsk);
+extern void sched_core_fork(struct task_struct *p);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
+static inline void sched_core_fork(struct task_struct *p) { }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 7ac592aa35a684ff1858fb9ec282886b9e3575ac Mon Sep 17 00:00:00 2001
From: Chris Hyser <chris.hyser@oracle.com>
Date: Wed, 24 Mar 2021 17:40:15 -0400
Subject: sched: prctl() core-scheduling interface

This patch provides support for setting and copying core scheduling
'task cookies' between threads (PID), processes (TGID), and process
groups (PGID).

The value of core scheduling isn't that tasks don't share a core,
'nosmt' can do that. The value lies in exploiting all the sharing
opportunities that exist to recover possible lost performance and that
requires a degree of flexibility in the API.

From a security perspective (and there are others), the thread,
process and process group distinction is an existent hierarchal
categorization of tasks that reflects many of the security concerns
about 'data sharing'. For example, protecting against cache-snooping
by a thread that can just read the memory directly isn't all that
useful.

With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a
mechanism to create and share cookies. CREATE/SHARE_TO specify a
target pid with enum pidtype used to specify the scope of the targeted
tasks. For example, PIDTYPE_TGID will share the cookie with the
process and all of it's threads as typically desired in a security
scenario.

API:

  prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL)

where 'tgtpid/srcpid == 0' implies the current process and pidtype is
kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}.

For return values, EINVAL, ENOMEM are what they say. ESRCH means the
tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission
access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT.

[peterz: complete rewrite]
Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123309.039845339@infradead.org
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fba47e52e482..c7e7d50e2fdc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2182,6 +2182,8 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 #ifdef CONFIG_SCHED_CORE
 extern void sched_core_free(struct task_struct *tsk);
 extern void sched_core_fork(struct task_struct *p);
+extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+				unsigned long uaddr);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
 static inline void sched_core_fork(struct task_struct *p) { }
-- 
cgit v1.2.3


From fa5e5dc39669b4427830c546ede8709323b8276c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:58 +0200
Subject: jump_label, x86: Introduce jump_entry_size()

This allows architectures to have variable sized jumps.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.786777050@infradead.org
---
 include/linux/jump_label.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 05f5554d860f..8c45f58292ac 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -176,6 +176,15 @@ static inline void jump_entry_set_init(struct jump_entry *entry)
 	entry->key |= 2;
 }
 
+static inline int jump_entry_size(struct jump_entry *entry)
+{
+#ifdef JUMP_LABEL_NOP_SIZE
+	return JUMP_LABEL_NOP_SIZE;
+#else
+	return arch_jump_entry_size(entry);
+#endif
+}
+
 #endif
 #endif
 
-- 
cgit v1.2.3


From 5af0ea293d78c8b8f0b87ae2b13f7ac584057bc3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:00 +0200
Subject: jump_label: Free jump_entry::key bit1 for build use

Have jump_label_init() set jump_entry::key bit1 to either 0 ot 1
unconditionally. This makes it available for build-time games.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.906893264@infradead.org
---
 include/linux/jump_label.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8c45f58292ac..48b9b2a82767 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -171,9 +171,12 @@ static inline bool jump_entry_is_init(const struct jump_entry *entry)
 	return (unsigned long)entry->key & 2UL;
 }
 
-static inline void jump_entry_set_init(struct jump_entry *entry)
+static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
 {
-	entry->key |= 2;
+	if (set)
+		entry->key |= 2;
+	else
+		entry->key &= ~2;
 }
 
 static inline int jump_entry_size(struct jump_entry *entry)
-- 
cgit v1.2.3


From 190515f610946db025cdedebde93958b725fb583 Mon Sep 17 00:00:00 2001
From: Lin Feng <linf@wangsu.com>
Date: Wed, 12 May 2021 18:01:24 +0800
Subject: blkdev.h: remove unused codes blk_account_rq

Last users of blk_account_rq gone with patch commit a1ce35fa49852db
("block: remove dead elevator code") and now it gets no caller, it can
be safely removed.

Signed-off-by: Lin Feng <linf@wangsu.com>
Link: https://lore.kernel.org/r/20210512100124.173769-1-linf@wangsu.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b91ba6207365..26c3e368656f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -677,11 +677,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
 
-static inline bool blk_account_rq(struct request *rq)
-{
-	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
-}
-
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
-- 
cgit v1.2.3


From cc00c1988801dc71f63bb7bad019e85046865095 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 12 May 2021 19:51:31 +0200
Subject: sched: Fix leftover comment typos

A few more snuck in. Also capitalize 'CPU' while at it.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched_clock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 528718e4ed52..835ee87ed792 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -14,7 +14,7 @@
  * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
  *			clocks.
  * @read_sched_clock:	Current clock source (or dummy source when suspended).
- * @mult:		Multipler for scaled math conversion.
+ * @mult:		Multiplier for scaled math conversion.
  * @shift:		Shift value for scaled math conversion.
  *
  * Care must be taken when updating this structure; it is read by
-- 
cgit v1.2.3


From 93d0955e6cf562d02aae37f5f8d98d9d9d16e0d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 12 May 2021 20:04:28 +0200
Subject: locking: Fix comment typos

A few snuck through.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 2ec9ff5a7fff..3e726ace5c62 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -52,7 +52,7 @@ enum lockdep_lock_type {
  * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
  * cached in the instance of lockdep_map
  *
- * Currently main class (subclass == 0) and signle depth subclass
+ * Currently main class (subclass == 0) and single depth subclass
  * are cached in lockdep_map. This optimization is mainly targeting
  * on rq->lock. double_rq_lock() acquires this highly competitive with
  * single depth.
-- 
cgit v1.2.3


From ca0760e7d79e2bb9c342e6b3f925b1ef01c6303e Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Thu, 6 May 2021 20:30:51 +0800
Subject: Compiler Attributes: Add continue in comment

Add "continue;" for switch/case block according to Doc[1]

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Signed-off-by: Miguel Ojeda <o