From 9fd4dcece43a53e5a9e65a973df5693702ee6401 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:13 +0100 Subject: debugfs: prevent access to possibly dead file_operations at file open Nothing prevents a dentry found by path lookup before a return of __debugfs_remove() to actually get opened after that return. Now, after the return of __debugfs_remove(), there are no guarantees whatsoever regarding the memory the corresponding inode's file_operations object had been kept in. Since __debugfs_remove() is seldomly invoked, usually from module exit handlers only, the race is hard to trigger and the impact is very low. A discussion of the problem outlined above as well as a suggested solution can be found in the (sub-)thread rooted at http://lkml.kernel.org/g/20130401203445.GA20862@ZenIV.linux.org.uk ("Yet another pipe related oops.") Basically, Greg KH suggests to introduce an intermediate fops and Al Viro points out that a pointer to the original ones may be stored in ->d_fsdata. Follow this line of reasoning: - Add SRCU as a reverse dependency of DEBUG_FS. - Introduce a srcu_struct object for the debugfs subsystem. - In debugfs_create_file(), store a pointer to the original file_operations object in ->d_fsdata. - Make debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace period after the dentry has been delete()'d and before they return to their callers. - Introduce an intermediate file_operations object named "debugfs_open_proxy_file_operations". It's ->open() functions checks, under the protection of a SRCU read lock, whether the dentry is still alive, i.e. has not been d_delete()'d and if so, tries to acquire a reference on the owning module. On success, it sets the file object's ->f_op to the original file_operations and forwards the ongoing open() call to the original ->open(). - For clarity, rename the former debugfs_file_operations to debugfs_noop_file_operations -- they are in no way canonical. The choice of SRCU over "normal" RCU is justified by the fact, that the former may also be used to protect ->i_private data from going away during the execution of a file's readers and writers which may (and do) sleep. Finally, introduce the fs/debugfs/internal.h header containing some declarations internal to the debugfs implementation. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1e9a607534ca..ddb0e8337aae 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -257,6 +257,7 @@ config PAGE_OWNER config DEBUG_FS bool "Debug Filesystem" + select SRCU help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and -- cgit v1.2.3 From 9b1d6c8950021ab007608d455fc9c398ecd25476 Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Mon, 4 Apr 2016 14:48:11 -0700 Subject: lib: scatterlist: move SG pool code from SCSI driver to lib/sg_pool.c Now it's ready to move the mempool based SG chained allocator code from SCSI driver to lib/sg_pool.c, which will be compiled only based on a Kconfig symbol CONFIG_SG_POOL. SCSI selects CONFIG_SG_POOL. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lin Reviewed-by: Sagi Grimberg Signed-off-by: Martin K. Petersen --- lib/Kconfig | 7 +++ lib/Makefile | 1 + lib/sg_pool.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+) create mode 100644 lib/sg_pool.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 3cca1222578e..61d55bd0ed89 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -523,6 +523,13 @@ config SG_SPLIT a scatterlist. This should be selected by a driver or an API which whishes to split a scatterlist amongst multiple DMA channels. +config SG_POOL + def_bool n + help + Provides a helper to allocate chained scatterlists. This should be + selected by a driver or an API which whishes to allocate chained + scatterlist. + # # sg chaining option # diff --git a/lib/Makefile b/lib/Makefile index 7bd6fd436c97..bf01c2673423 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -178,6 +178,7 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o +obj-$(CONFIG_SG_POOL) += sg_pool.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o diff --git a/lib/sg_pool.c b/lib/sg_pool.c new file mode 100644 index 000000000000..6dd30615a201 --- /dev/null +++ b/lib/sg_pool.c @@ -0,0 +1,172 @@ +#include +#include +#include +#include + +#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools) +#define SG_MEMPOOL_SIZE 2 + +struct sg_pool { + size_t size; + char *name; + struct kmem_cache *slab; + mempool_t *pool; +}; + +#define SP(x) { .size = x, "sgpool-" __stringify(x) } +#if (SG_CHUNK_SIZE < 32) +#error SG_CHUNK_SIZE is too small (must be 32 or greater) +#endif +static struct sg_pool sg_pools[] = { + SP(8), + SP(16), +#if (SG_CHUNK_SIZE > 32) + SP(32), +#if (SG_CHUNK_SIZE > 64) + SP(64), +#if (SG_CHUNK_SIZE > 128) + SP(128), +#if (SG_CHUNK_SIZE > 256) +#error SG_CHUNK_SIZE is too large (256 MAX) +#endif +#endif +#endif +#endif + SP(SG_CHUNK_SIZE) +}; +#undef SP + +static inline unsigned int sg_pool_index(unsigned short nents) +{ + unsigned int index; + + BUG_ON(nents > SG_CHUNK_SIZE); + + if (nents <= 8) + index = 0; + else + index = get_count_order(nents) - 3; + + return index; +} + +static void sg_pool_free(struct scatterlist *sgl, unsigned int nents) +{ + struct sg_pool *sgp; + + sgp = sg_pools + sg_pool_index(nents); + mempool_free(sgl, sgp->pool); +} + +static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask) +{ + struct sg_pool *sgp; + + sgp = sg_pools + sg_pool_index(nents); + return mempool_alloc(sgp->pool, gfp_mask); +} + +/** + * sg_free_table_chained - Free a previously mapped sg table + * @table: The sg table header to use + * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained? + * + * Description: + * Free an sg table previously allocated and setup with + * sg_alloc_table_chained(). + * + **/ +void sg_free_table_chained(struct sg_table *table, bool first_chunk) +{ + if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE) + return; + __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free); +} +EXPORT_SYMBOL_GPL(sg_free_table_chained); + +/** + * sg_alloc_table_chained - Allocate and chain SGLs in an sg table + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @first_chunk: first SGL + * + * Description: + * Allocate and chain SGLs in an sg table. If @nents@ is larger than + * SG_CHUNK_SIZE a chained sg table will be setup. + * + **/ +int sg_alloc_table_chained(struct sg_table *table, int nents, + struct scatterlist *first_chunk) +{ + int ret; + + BUG_ON(!nents); + + if (first_chunk) { + if (nents <= SG_CHUNK_SIZE) { + table->nents = table->orig_nents = nents; + sg_init_table(table->sgl, nents); + return 0; + } + } + + ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE, + first_chunk, GFP_ATOMIC, sg_pool_alloc); + if (unlikely(ret)) + sg_free_table_chained(table, (bool)first_chunk); + return ret; +} +EXPORT_SYMBOL_GPL(sg_alloc_table_chained); + +static __init int sg_pool_init(void) +{ + int i; + + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + int size = sgp->size * sizeof(struct scatterlist); + + sgp->slab = kmem_cache_create(sgp->name, size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!sgp->slab) { + printk(KERN_ERR "SG_POOL: can't init sg slab %s\n", + sgp->name); + goto cleanup_sdb; + } + + sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, + sgp->slab); + if (!sgp->pool) { + printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n", + sgp->name); + goto cleanup_sdb; + } + } + + return 0; + +cleanup_sdb: + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + if (sgp->pool) + mempool_destroy(sgp->pool); + if (sgp->slab) + kmem_cache_destroy(sgp->slab); + } + + return -ENOMEM; +} + +static __exit void sg_pool_exit(void) +{ + int i; + + for (i = 0; i < SG_MEMPOOL_NR; i++) { + struct sg_pool *sgp = sg_pools + i; + mempool_destroy(sgp->pool); + kmem_cache_destroy(sgp->slab); + } +} + +module_init(sg_pool_init); +module_exit(sg_pool_exit); -- cgit v1.2.3 From b53f27e4fa0d0e72d897830cc4f3f83d2a25d952 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:23 -0700 Subject: string_helpers: add kstrdup_quotable Handle allocating and escaping a string safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- lib/string_helpers.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 5c88204b6f1f..aa00c9f989ee 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -534,3 +535,30 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, return p - dst; } EXPORT_SYMBOL(string_escape_mem); + +/* + * Return an allocated string that has been escaped of special characters + * and double quotes, making it safe to log in quotes. + */ +char *kstrdup_quotable(const char *src, gfp_t gfp) +{ + size_t slen, dlen; + char *dst; + const int flags = ESCAPE_HEX; + const char esc[] = "\f\n\r\t\v\a\e\\\""; + + if (!src) + return NULL; + slen = strlen(src); + + dlen = string_escape_mem(src, slen, NULL, 0, flags, esc); + dst = kmalloc(dlen + 1, gfp); + if (!dst) + return NULL; + + WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen); + dst[dlen] = '\0'; + + return dst; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable); -- cgit v1.2.3 From 0d0443288f2244d7054796086e481ddef6abdbba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:24 -0700 Subject: string_helpers: add kstrdup_quotable_cmdline Provide an escaped (but readable: no inter-argument NULLs) commandline safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- lib/string_helpers.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'lib') diff --git a/lib/string_helpers.c b/lib/string_helpers.c index aa00c9f989ee..b16ee85aaf87 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -562,3 +563,36 @@ char *kstrdup_quotable(const char *src, gfp_t gfp) return dst; } EXPORT_SYMBOL_GPL(kstrdup_quotable); + +/* + * Returns allocated NULL-terminated string containing process + * command line, with inter-argument NULLs replaced with spaces, + * and other special characters escaped. + */ +char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp) +{ + char *buffer, *quoted; + int i, res; + + buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY); + if (!buffer) + return NULL; + + res = get_cmdline(task, buffer, PAGE_SIZE - 1); + buffer[res] = '\0'; + + /* Collapse trailing NULLs, leave res pointing to last non-NULL. */ + while (--res >= 0 && buffer[res] == '\0') + ; + + /* Replace inter-argument NULLs. */ + for (i = 0; i <= res; i++) + if (buffer[i] == '\0') + buffer[i] = ' '; + + /* Make sure result is printable. */ + quoted = kstrdup_quotable(buffer, gfp); + kfree(buffer); + return quoted; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline); -- cgit v1.2.3 From 21985319add60b55fc27230d9421a3e5af7e998a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:25 -0700 Subject: string_helpers: add kstrdup_quotable_file Allocate a NULL-terminated file path with special characters escaped, safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- lib/string_helpers.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/string_helpers.c b/lib/string_helpers.c index b16ee85aaf87..ecaac2c0526f 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -596,3 +598,31 @@ char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp) return quoted; } EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline); + +/* + * Returns allocated NULL-terminated string containing pathname, + * with special characters escaped, able to be safely logged. If + * there is an error, the leading character will be "<". + */ +char *kstrdup_quotable_file(struct file *file, gfp_t gfp) +{ + char *temp, *pathname; + + if (!file) + return kstrdup("", gfp); + + /* We add 11 spaces for ' (deleted)' to be appended */ + temp = kmalloc(PATH_MAX + 11, GFP_TEMPORARY); + if (!temp) + return kstrdup("", gfp); + + pathname = file_path(file, temp, PATH_MAX + 11); + if (IS_ERR(pathname)) + pathname = kstrdup("", gfp); + else + pathname = kstrdup_quotable(pathname, gfp); + + kfree(temp); + return pathname; +} +EXPORT_SYMBOL_GPL(kstrdup_quotable_file); -- cgit v1.2.3 From dd254f5a382cc7879db7a07ed266b12d38fe3ab6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 May 2016 11:54:48 -0400 Subject: fold checks into iterate_and_advance() they are open-coded in all users except iov_iter_advance(), and there they wouldn't be a bad idea either - as it is, iov_iter_advance(i, 0) ends up dereferencing potentially past the end of iovec array. It doesn't do anything with the value it reads, and very unlikely to trigger an oops on dereference, but it is not impossible. Reported-by: Jiri Slaby Reported-by: Takashi Iwai Signed-off-by: Al Viro --- lib/iov_iter.c | 104 ++++++++++++++++++++------------------------------------- 1 file changed, 36 insertions(+), 68 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 5fecddc32b1b..015061e49236 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -99,40 +99,44 @@ } #define iterate_and_advance(i, n, v, I, B, K) { \ - size_t skip = i->iov_offset; \ - if (unlikely(i->type & ITER_BVEC)) { \ - const struct bio_vec *bvec; \ - struct bio_vec v; \ - iterate_bvec(i, n, v, bvec, skip, (B)) \ - if (skip == bvec->bv_len) { \ - bvec++; \ - skip = 0; \ - } \ - i->nr_segs -= bvec - i->bvec; \ - i->bvec = bvec; \ - } else if (unlikely(i->type & ITER_KVEC)) { \ - const struct kvec *kvec; \ - struct kvec v; \ - iterate_kvec(i, n, v, kvec, skip, (K)) \ - if (skip == kvec->iov_len) { \ - kvec++; \ - skip = 0; \ - } \ - i->nr_segs -= kvec - i->kvec; \ - i->kvec = kvec; \ - } else { \ - const struct iovec *iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ - if (skip == iov->iov_len) { \ - iov++; \ - skip = 0; \ + if (unlikely(i->count < n)) \ + n = i->count; \ + if (n) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + if (skip == bvec->bv_len) { \ + bvec++; \ + skip = 0; \ + } \ + i->nr_segs -= bvec - i->bvec; \ + i->bvec = bvec; \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + if (skip == kvec->iov_len) { \ + kvec++; \ + skip = 0; \ + } \ + i->nr_segs -= kvec - i->kvec; \ + i->kvec = kvec; \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + if (skip == iov->iov_len) { \ + iov++; \ + skip = 0; \ + } \ + i->nr_segs -= iov - i->iov; \ + i->iov = iov; \ } \ - i->nr_segs -= iov - i->iov; \ - i->iov = iov; \ + i->count -= n; \ + i->iov_offset = skip; \ } \ - i->count -= n; \ - i->iov_offset = skip; \ } static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, @@ -386,12 +390,6 @@ static void memzero_page(struct page *page, size_t offset, size_t len) size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - iterate_and_advance(i, bytes, v, __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), @@ -407,12 +405,6 @@ EXPORT_SYMBOL(copy_to_iter); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - iterate_and_advance(i, bytes, v, __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), @@ -428,12 +420,6 @@ EXPORT_SYMBOL(copy_from_iter); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - iterate_and_advance(i, bytes, v, __copy_from_user_nocache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), @@ -474,12 +460,6 @@ EXPORT_SYMBOL(copy_page_from_iter); size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - iterate_and_advance(i, bytes, v, __clear_user(v.iov_base, v.iov_len), memzero_page(v.bv_page, v.bv_offset, v.bv_len), @@ -666,12 +646,6 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, char *to = addr; __wsum sum, next; size_t off = 0; - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - sum = *csum; iterate_and_advance(i, bytes, v, ({ int err = 0; @@ -710,12 +684,6 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, const char *from = addr; __wsum sum, next; size_t off = 0; - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - sum = *csum; iterate_and_advance(i, bytes, v, ({ int err = 0; -- cgit v1.2.3 From b1e4d9d82df8ab9097f80aa208c40eab6fc29858 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:20 -0700 Subject: debugobjects: make fixup functions return bool instead of int I am going to introduce debugobjects infrastructure to USB subsystem. But before this, I found the code of debugobjects could be improved. This patchset will make fixup functions return bool type instead of int. Because fixup only need report success or no. boolean is the 'real' type. This patch (of 7): The object debugging infrastructure core provides some fixup callbacks for the subsystem who use it. These callbacks are called from the debug code whenever a problem in debug_object_init is detected. And debugobjects core suppose them returns 1 when the fixup was successful, otherwise 0. So the return type is boolean. A bad thing is that debug_object_fixup use the return value for arithmetic operation. It confused me that what is the reall return type. Reading over the whole code, I found some place do use the return value incorrectly(see next patch). So why use bool type instead? Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/debugobjects.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 519b5a10fd70..a9cee165cf25 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -269,16 +269,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg) * Try to repair the damage, so we have a better chance to get useful * debug output. */ -static int -debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state), +static bool +debug_object_fixup(bool (*fixup)(void *addr, enum debug_obj_state state), void * addr, enum debug_obj_state state) { - int fixed = 0; - - if (fixup) - fixed = fixup(addr, state); - debug_objects_fixups += fixed; - return fixed; + if (fixup && fixup(addr, state)) { + debug_objects_fixups++; + return true; + } + return false; } static void debug_object_is_on_stack(void *addr, int onstack) @@ -797,7 +796,7 @@ static __initdata struct debug_obj_descr descr_type_test; * fixup_init is called when: * - an active object is initialized */ -static int __init fixup_init(void *addr, enum debug_obj_state state) +static bool __init fixup_init(void *addr, enum debug_obj_state state) { struct self_test *obj = addr; @@ -805,9 +804,9 @@ static int __init fixup_init(void *addr, enum debug_obj_state state) case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); debug_object_init(obj, &descr_type_test); - return 1; + return true; default: - return 0; + return false; } } @@ -816,7 +815,7 @@ static int __init fixup_init(void *addr, enum debug_obj_state state) * - an active object is activated * - an unknown object is activated (might be a statically initialized object) */ -static int __init fixup_activate(void *addr, enum debug_obj_state state) +static bool __init fixup_activate(void *addr, enum debug_obj_state state) { struct self_test *obj = addr; @@ -825,17 +824,17 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state) if (obj->static_init == 1) { debug_object_init(obj, &descr_type_test); debug_object_activate(obj, &descr_type_test); - return 0; + return false; } - return 1; + return true; case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); debug_object_activate(obj, &descr_type_test); - return 1; + return true; default: - return 0; + return false; } } @@ -843,7 +842,7 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state) * fixup_destroy is called when: * - an active object is destroyed */ -static int __init fixup_destroy(void *addr, enum debug_obj_state state) +static bool __init fixup_destroy(void *addr, enum debug_obj_state state) { struct self_test *obj = addr; @@ -851,9 +850,9 @@ static int __init fixup_destroy(void *addr, enum debug_obj_state state) case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); debug_object_destroy(obj, &descr_type_test); - return 1; + return true; default: - return 0; + return false; } } @@ -861,7 +860,7 @@ static int __init fixup_destroy(void *addr, enum debug_obj_state state) * fixup_free is called when: * - an active object is freed */ -static int __init fixup_free(void *addr, enum debug_obj_state state) +static bool __init fixup_free(void *addr, enum debug_obj_state state) { struct self_test *obj = addr; @@ -869,9 +868,9 @@ static int __init fixup_free(void *addr, enum debug_obj_state state) case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); debug_object_free(obj, &descr_type_test); - return 1; + return true; default: - return 0; + return false; } } -- cgit v1.2.3 From e7a8e78bd4ad931660743bd2dbabd9170a715294 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:23 -0700 Subject: debugobjects: correct the usage of fixup call results If debug_object_fixup() return non-zero when problem has been fixed. But the code got it backwards, it taks 0 as fixup successfully. So fix it. Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a9cee165cf25..2f07c8c697b8 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -415,7 +415,7 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr) state = obj->state; raw_spin_unlock_irqrestore(&db->lock, flags); ret = debug_object_fixup(descr->fixup_activate, addr, state); - return ret ? -EINVAL : 0; + return ret ? 0 : -EINVAL; case ODEBUG_STATE_DESTROYED: debug_print_object(obj, "activate"); -- cgit v1.2.3 From d99b1d8912654c4bdeb51063d2e934afc2372cc2 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:35 -0700 Subject: percpu_counter: update debugobjects fixup callbacks return type Update the return type to use bool instead of int, corresponding to cheange (debugobjects: make fixup functions return bool instead of int). Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/percpu_counter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index f051d69f0910..72d36113ccaa 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -19,7 +19,7 @@ static DEFINE_SPINLOCK(percpu_counters_lock); static struct debug_obj_descr percpu_counter_debug_descr; -static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state) +static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state) { struct percpu_counter *fbc = addr; @@ -27,9 +27,9 @@ static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state) case ODEBUG_STATE_ACTIVE: percpu_counter_destroy(fbc); debug_object_free(fbc, &percpu_counter_debug_descr); - return 1; + return true; default: - return 0; + return false; } } -- cgit v1.2.3 From b9fdac7f660609abb157500e468d2165b3c9cf08 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:41 -0700 Subject: debugobjects: insulate non-fixup logic related to static obj from fixup callbacks When activating a static object we need make sure that the object is tracked in the object tracker. If it is a non-static object then the activation is illegal. In previous implementation, each subsystem need take care of this in their fixup callbacks. Actually we can put it into debugobjects core. Thus we can save duplicated code, and have *pure* fixup callbacks. To achieve this, a new callback "is_static_object" is introduced to let the type specific code decide whether a object is static or not. If yes, we take it into object tracker, otherwise give warning and invoke fixup callback. This change has paassed debugobjects selftest, and I also do some test with all debugobjects supports enabled. At last, I have a concern about the fixups that can it change the object which is in incorrect state on fixup? Because the 'addr' may not point to any valid object if a non-static object is not tracked. Then Change such object can overwrite someone's memory and cause unexpected behaviour. For example, the timer_fixup_activate bind timer to function stub_timer. Link: http://lkml.kernel.org/r/1462576157-14539-1-git-send-email-changbin.du@intel.com [changbin.du@intel.com: improve code comments where invoke the new is_static_object callback] Link: http://lkml.kernel.org/r/1462777431-8171-1-git-send-email-changbin.du@intel.com Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/debugobjects.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2f07c8c697b8..a8e12601eb37 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -431,14 +431,21 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr) raw_spin_unlock_irqrestore(&db->lock, flags); /* - * This happens when a static object is activated. We - * let the type specific code decide whether this is - * true or not. + * We are here when a static object is activated. We + * let the type specific code confirm whether this is + * true or not. if true, we just make sure that the + * static object is tracked in the object tracker. If + * not, this must be a bug, so we try to fix it up. */ - if (debug_object_fixup(descr->fixup_activate, addr, - ODEBUG_STATE_NOTAVAILABLE)) { + if (descr->is_static_object && descr->is_static_object(addr)) { + /* track this static object */ + debug_object_init(addr, descr); + debug_object_activate(addr, descr); + } else { debug_print_object(&o, "activate"); - return -EINVAL; + ret = debug_object_fixup(descr->fixup_activate, addr, + ODEBUG_STATE_NOTAVAILABLE); + return ret ? 0 : -EINVAL; } return 0; } @@ -602,12 +609,18 @@ void debug_object_assert_init(void *addr, struct debug_obj_descr *descr) raw_spin_unlock_irqrestore(&db->lock, flags); /* - * Maybe the object is static. Let the type specific - * code decide what to do. + * Maybe the object is static, and we let the type specific + * code confirm. Track this static object if true, else invoke + * fixup. */ - if (debug_object_fixup(descr->fixup_assert_init, addr, - ODEBUG_STATE_NOTAVAILABLE)) + if (descr->is_static_object && descr->is_static_object(addr)) { + /* Track this static object */ + debug_object_init(addr, descr); + } else { debug_print_object(&o, "assert_init"); + debug_object_fixup(descr->fixup_assert_init, addr, + ODEBUG_STATE_NOTAVAILABLE); + } return; } @@ -792,6 +805,13 @@ struct self_test { static __initdata struct debug_obj_descr descr_type_test; +static bool __init is_static_object(void *addr) +{ + struct self_test *obj = addr; + + return obj->static_init; +} + /* * fixup_init is called when: * - an active object is initialized @@ -813,7 +833,7 @@ static bool __init fixup_init(void *addr, enum debug_obj_state state) /* * fixup_activate is called when: * - an active object is activated - * - an unknown object is activated (might be a statically initialized object) + * - an unknown non-static object is activated */ static bool __init fixup_activate(void *addr, enum debug_obj_state state) { @@ -821,13 +841,7 @@ static bool __init fixup_activate(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_NOTAVAILABLE: - if (obj->static_init == 1) { - debug_object_init(obj, &descr_type_test); - debug_object_activate(obj, &descr_type_test); - return false; - } return true; - case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); debug_object_activate(obj, &descr_type_test); @@ -916,6 +930,7 @@ out: static __initdata struct debug_obj_descr descr_type_test = { .name = "selftest", + .is_static_object = is_static_object, .fixup_init = fixup_init, .fixup_activate = fixup_activate, .fixup_destroy = fixup_destroy, -- cgit v1.2.3 From 0edaf86cf1a6a97d811fc34765ddbcbc310de564 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 May 2016 17:10:58 -0700 Subject: include/linux/nodemask.h: create next_node_in() helper Lots of code does node = next_node(node, XXX); if (node == MAX_NUMNODES) node = first_node(XXX); so create next_node_in() to do this and use it in various places. [mhocko@suse.com: use next_node_in() helper] Acked-by: Vlastimil Babka Acked-by: Michal Hocko Signed-off-by: Michal Hocko Cc: Xishi Qiu Cc: Joonsoo Kim Cc: David Rientjes Cc: Naoya Horiguchi Cc: Laura Abbott Cc: Hui Zhu Cc: Wang Xiaoqiang Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 2 +- lib/nodemask.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 lib/nodemask.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 931396ada5eb..42b69185f963 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -25,7 +25,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o diff --git a/lib/nodemask.c b/lib/nodemask.c new file mode 100644 index 000000000000..e42a5bf44d33 --- /dev/null +++ b/lib/nodemask.c @@ -0,0 +1,30 @@ +#include +#include +#include + +int __next_node_in(int node, const nodemask_t *srcp) +{ + int ret = __next_node(node, srcp); + + if (ret == MAX_NUMNODES) + ret = __first_node(srcp); + return ret; +} +EXPORT_SYMBOL(__next_node_in); + +#ifdef CONFIG_NUMA +/* + * Return the bit number of a random bit set in the nodemask. + * (returns NUMA_NO_NODE if nodemask is empty) + */ +int node_random(const nodemask_t *maskp) +{ + int w, bit = NUMA_NO_NODE; + + w = nodes_weight(*maskp); + if (w) + bit = bitmap_ord_to_pos(maskp->bits, + get_random_int() % w, MAX_NUMNODES); + return bit; +} +#endif -- cgit v1.2.3 From 96fe805fb6fe9b2ed12fc54ad0e3e6829a4152cb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 20 May 2016 16:59:17 -0700 Subject: mm, kasan: add a ksize() test Add a test that makes sure ksize() unpoisons the whole chunk. Signed-off-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Konstantin Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 82169fbf2453..48e5a0be655c 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -344,6 +344,25 @@ static noinline void __init kasan_stack_oob(void) *(volatile char *)p; } +static noinline void __init ksize_unpoisons_memory(void) +{ + char *ptr; + size_t size = 123, real_size = size; + + pr_info("ksize() unpoisons the whole allocated chunk\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + real_size = ksize(ptr); + /* This access doesn't trigger an error. */ + ptr[size] = 'x'; + /* This one does. */ + ptr[real_size] = 'y'; + kfree(ptr); +} + static int __init kmalloc_tests_init(void) { kmalloc_oob_right(); @@ -367,6 +386,7 @@ static int __init kmalloc_tests_init(void) kmem_cache_oob(); kasan_stack_oob(); kasan_global_oob(); + ksize_unpoisons_memory(); return -EAGAIN; } -- cgit v1.2.3 From 1771c6e1a567ea0ba2cccc0a4ffe68a1419fd8ef Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:31 -0700 Subject: x86/kasan: instrument user memory access API Exchange between user and kernel memory is coded in assembly language. Which means that such accesses won't be spotted by KASAN as a compiler instruments only C code. Add explicit KASAN checks to user memory access API to ensure that userspace writes to (or reads from) a valid kernel memory. Note: Unlike others strncpy_from_user() is written mostly in C and KASAN sees memory accesses in it. However, it makes sense to add explicit check for all @count bytes that *potentially* could be written to the kernel. [aryabinin@virtuozzo.com: move kasan check under the condition] Link: http://lkml.kernel.org/r/1462869209-21096-1-git-send-email-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/1462538722-1574-4-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/strncpy_from_user.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 33840324138c..33f655ef48cd 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -109,6 +110,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count) unsigned long max = max_addr - src_addr; long retval; + kasan_check_write(dst, count); user_access_begin(); retval = do_strncpy_from_user(dst, src, count, max); user_access_end(); -- cgit v1.2.3 From eae08dcab80c695c16c9f1f7dcd5b8ed52bfc88b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:34 -0700 Subject: kasan/tests: add tests for user memory access functions Add some tests for the newly-added user memory access API. Link: http://lkml.kernel.org/r/1462538722-1574-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 48e5a0be655c..5e51872b3fc1 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -12,9 +12,12 @@ #define pr_fmt(fmt) "kasan test: %s " fmt, __func__ #include +#include +#include #include #include #include +#include #include static noinline void __init kmalloc_oob_right(void) @@ -363,6 +366,51 @@ static noinline void __init ksize_unpoisons_memory(void) kfree(ptr); } +static noinline void __init copy_user_test(void) +{ + char *kmem; + char __user *usermem; + size_t size = 10; + int unused; + + kmem = kmalloc(size, GFP_KERNEL); + if (!kmem) + return; + + usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (IS_ERR(usermem)) { + pr_err("Failed to allocate user memory\n"); + kfree(kmem); + return; + } + + pr_info("out-of-bounds in copy_from_user()\n"); + unused = copy_from_user(kmem, usermem, size + 1); + + pr_info("out-of-bounds in copy_to_user()\n"); + unused = copy_to_user(usermem, kmem, size + 1); + + pr_info("out-of-bounds in __copy_from_user()\n"); + unused = __copy_from_user(kmem, usermem, size + 1); + + pr_info("out-of-bounds in __copy_to_user()\n"); + unused = __copy_to_user(usermem, kmem, size + 1); + + pr_info("out-of-bounds in __copy_from_user_inatomic()\n"); + unused = __copy_from_user_inatomic(kmem, usermem, size + 1); + + pr_info("out-of-bounds in __copy_to_user_inatomic()\n"); + unused = __copy_to_user_inatomic(usermem, kmem, size + 1); + + pr_info("out-of-bounds in strncpy_from_user()\n"); + unused = strncpy_from_user(kmem, usermem, size + 1); + + vm_munmap((unsigned long)usermem, PAGE_SIZE); + kfree(kmem); +} + static int __init kmalloc_tests_init(void) { kmalloc_oob_right(); @@ -387,6 +435,7 @@ static int __init kmalloc_tests_init(void) kasan_stack_oob(); kasan_global_oob(); ksize_unpoisons_memory(); + copy_user_test(); return -EAGAIN; } -- cgit v1.2.3 From 42a0bb3f71383b457a7db362f1c69e7afb96732b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 20 May 2016 17:00:33 -0700 Subject: printk/nmi: generic solution for safe printk in NMI printk() takes some locks and could not be used a safe way in NMI context. The chance of a deadlock is real especially when printing stacks from all CPUs. This particular problem has been addressed on x86 by the commit a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs"). The patchset brings two big advantages. First, it makes the NMI backtraces safe on all architectures for free. Second, it makes all NMI messages almost safe on all architectures (the temporary buffer is limited. We still should keep the number of messages in NMI context at minimum). Note that there already are several messages printed in NMI context: WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE handlers. These are not easy to avoid. This patch reuses most of the code and makes it generic. It is useful for all messages and architectures that support NMI. The alternative printk_func is set when entering and is reseted when leaving NMI context. It queues IRQ work to copy the messages into the main ring buffer in a safe context. __printk_nmi_flush() copies all available messages and reset the buffer. Then we could use a simple cmpxchg operations to get synchronized with writers. There is also used a spinlock to get synchronized with other flushers. We do not longer use seq_buf because it depends on external lock. It would be hard to make all supported operations safe for a lockless use. It would be confusing and error prone to make only some operations safe. The code is put into separate printk/nmi.c as suggested by Steven Rostedt. It needs a per-CPU buffer and is compiled only on architectures that call nmi_enter(). This is achieved by the new HAVE_NMI Kconfig flag. The are MN10300 and Xtensa architectures. We need to clean up NMI handling there first. Let's do it separately. The patch is heavily based on the draft from Peter Zijlstra, see https://lkml.org/lkml/2015/6/10/327 [arnd@arndb.de: printk-nmi: use %zu format string for size_t] [akpm@linux-foundation.org: min_t->min - all types are size_t here] Signed-off-by: Petr Mladek Suggested-by: Peter Zijlstra Suggested-by: Steven Rostedt Cc: Jan Kara Acked-by: Russell King [arm part] Cc: Daniel Thompson Cc: Jiri Kosina Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: David Miller Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/nmi_backtrace.c | 89 +++-------------------------------------------------- 1 file changed, 5 insertions(+), 84 deletions(-) (limited to 'lib') diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 6019c53c669e..26caf51cc238 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -16,33 +16,14 @@ #include #include #include -#include #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static cpumask_t printtrace_mask; - -#define NMI_BUF_SIZE 4096 - -struct nmi_seq_buf { - unsigned char buffer[NMI_BUF_SIZE]; - struct seq_buf seq; -}; - -/* Safe printing in NMI context */ -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -static void print_seq_line(struct nmi_seq_buf *s, int start, int end) -{ - const char *buf = s->buffer + start; - - printk("%.*s", (end - start) + 1, buf); -} - /* * When raise() is called it will be is passed a pointer to the * backtrace_mask. Architectures that call nmi_cpu_backtrace() @@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end) void nmi_trigger_all_cpu_backtrace(bool include_self, void (*raise)(cpumask_t *mask)) { - struct nmi_seq_buf *s; - int i, cpu, this_cpu = get_cpu(); + int i, this_cpu = get_cpu(); if (test_and_set_bit(0, &backtrace_flag)) { /* @@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, if (!include_self) cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); - cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); - - /* - * Set up per_cpu seq_buf buffers that the NMIs running on the other - * CPUs will write to. - */ - for_each_cpu(cpu, to_cpumask(backtrace_mask)) { - s = &per_cpu(nmi_print_seq, cpu); - seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); - } - if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("Sending NMI to %s CPUs:\n", (include_self ? "all" : "other")); @@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, } /* - * Now that all the NMIs have triggered, we can dump out their - * back traces safely to the console. + * Force flush any remote buffers that might be stuck in IRQ context + * and therefore could not run their irq_work. */ - for_each_cpu(cpu, &printtrace_mask) { - int len, last_i = 0; + printk_nmi_flush(); - s = &per_cpu(nmi_print_seq, cpu); - len = seq_buf_used(&s->seq); - if (!len) - continue; - - /* Print line by line. */ - for (i = 0; i < len; i++) { - if (s->buffer[i] == '\n') { - print_seq_line(s, last_i, i); - last_i = i + 1; - } - } - /* Check if there was a partial line. */ - if (last_i < len) { - print_seq_line(s, last_i, len - 1); - pr_cont("\n"); - } - } - - clear_bit(0, &backtrace_flag); - smp_mb__after_atomic(); + clear_bit_unlock(0, &backtrace_flag); put_cpu(); } -/* - * It is not safe to call printk() directly from NMI handlers. - * It may be fine if the NMI detected a lock up and we have no choice - * but to do so, but doing a NMI on all other CPUs to get a back trace - * can be done with a sysrq-l. We don't want that to lock up, which - * can happen if the NMI interrupts a printk in progress. - * - * Instead, we redirect the vprintk() to this nmi_vprintk() that writes - * the content into a per cpu seq_buf buffer. Then when the NMIs are - * all done, we can safely dump the contents of the seq_buf to a printk() - * from a non NMI context. - */ -static int nmi_vprintk(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - unsigned int len = seq_buf_used(&s->seq); - - seq_buf_vprintf(&s->seq, fmt, args); - return seq_buf_used(&s->seq) - len; -} - bool nmi_cpu_backtrace(struct pt_regs *regs) { int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - printk_func_t printk_func_save = this_cpu_read(printk_func); - - /* Replace printk to write into the NMI seq */ - this_cpu_write(printk_func, nmi_vprintk); pr_warn("NMI backtrace for cpu %d\n", cpu); if (regs) show_regs(regs); else dump_stack(); - this_cpu_write(printk_func, printk_func_save); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } -- cgit v1.2.3 From aa4ea1c3b3948d325a6826adf9c367d11fa1ab74 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:00:54 -0700 Subject: lib/vsprintf: simplify UUID printing There are few functions here and there along with type definitions that provide UUID API. This series consolidates everything under one hood and converts current users. This has been tested for a while internally, however it doesn't mean we covered all possible cases (especially accuracy of UUID constants after conversion). So, please test this as much as you can and provide your tag. We appreciate the effort. The ACPI conversion is postponed for now to sort more generic things out first. This patch (of 9): Since we have hex_byte_pack_upper() we may use it directly and avoid second loop. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ccb664b54280..be0e7cf11e48 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1324,7 +1324,10 @@ char *uuid_string(char *buf, char *end, const u8 *addr, } for (i = 0; i < 16; i++) { - p = hex_byte_pack(p, addr[index[i]]); + if (uc) + p = hex_byte_pack_upper(p, addr[index[i]]); + else + p = hex_byte_pack(p, addr[index[i]]); switch (i) { case 3: case 5: @@ -1337,13 +1340,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr, *p = 0; - if (uc) { - p = uuid; - do { - *p = toupper(*p); - } while (*(++p)); - } - return string(buf, end, uuid, spec); } -- cgit v1.2.3 From 8da4b8c48e7b43cb16d05e1dbb34ad9f73ab7efd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:00 -0700 Subject: lib/uuid.c: move generate_random_uuid() to uuid.c Let's gather the UUID related functions under one hood. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/uuid.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib') diff --git a/lib/uuid.c b/lib/uuid.c index 398821e4dce1..6c81c0b0467e 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -23,6 +23,26 @@ #include #include +/*************************************************************** + * Random UUID interface + * + * Used here for a Boot ID, but can be useful for other kernel + * drivers. + ***************************************************************/ + +/* + * Generate random UUID + */ +void generate_random_uuid(unsigned char uuid[16]) +{ + get_random_bytes(uuid, 16); + /* Set UUID version to 4 --- truly random generation */ + uuid[6] = (uuid[6] & 0x0F) | 0x40; + /* Set the UUID variant to DCE */ + uuid[8] = (uuid[8] & 0x3F) | 0x80; +} +EXPORT_SYMBOL(generate_random_uuid); + static void __uuid_gen_common(__u8 b[16]) { prandom_bytes(b, 16); -- cgit v1.2.3 From 2b1b0d66704a8cafe83be7114ec4c15ab3a314ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:04 -0700 Subject: lib/uuid.c: introduce a few more generic helpers There are new helpers in this patch: uuid_is_valid checks if a UUID is valid uuid_be_to_bin converts from string to binary (big endian) uuid_le_to_bin converts from string to binary (little endian) They will be used in future, i.e. in the following patches in the series. This also moves the indices arrays to lib/uuid.c to be shared accross modules. [andriy.shevchenko@linux.intel.com: fix typo] Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/uuid.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/vsprintf.c | 9 ++++---- 2 files changed, 69 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/uuid.c b/lib/uuid.c index 6c81c0b0467e..82787f652fbc 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -19,10 +19,17 @@ */ #include +#include +#include #include #include #include +const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(uuid_le_index); +const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(uuid_be_index); + /*************************************************************** * Random UUID interface * @@ -65,3 +72,61 @@ void uuid_be_gen(uuid_be *bu) bu->b[6] = (bu->b[6] & 0x0F) | 0x40; } EXPORT_SYMBOL_GPL(uuid_be_gen); + +/** + * uuid_is_valid - checks if UUID string valid + * @uuid: UUID string to check + * + * Description: + * It checks if the UUID string is following the format: + * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + * where x is a hex digit. + * + * Return: true if input is valid UUID string. + */ +bool uuid_is_valid(const char *uuid) +{ + unsigned int i; + + for (i = 0; i < UUID_STRING_LEN; i++) { + if (i == 8 || i == 13 || i == 18 || i == 23) { + if (uuid[i] != '-') + return false; + } else if (!isxdigit(uuid[i])) { + return false; + } + } + + return true; +} +EXPORT_SYMBOL(uuid_is_valid); + +static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) +{ + static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34}; + unsigned int i; + + if (!uuid_is_valid(uuid)) + return -EINVAL; + + for (i = 0; i < 16; i++) { + int hi = hex_to_bin(uuid[si[i]] + 0); + int lo = hex_to_bin(uuid[si[i]] + 1); + + b[ei[i]] = (hi << 4) | lo; + } + + return 0; +} + +int uuid_le_to_bin(const char *uuid, uuid_le *u) +{ + return __uuid_to_bin(uuid, u->b, uuid_le_index); +} +EXPORT_SYMBOL(uuid_le_to_bin); + +int uuid_be_to_bin(const char *uuid, uuid_be *u) +{ + return __uuid_to_bin(uuid, u->b, uuid_be_index); +} +EXPORT_SYMBOL(uuid_be_to_bin); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index be0e7cf11e48..0967771d8f7f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #ifdef CONFIG_BLOCK #include @@ -1304,19 +1305,17 @@ static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, struct printf_spec spec, const char *fmt) { - char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char uuid[UUID_STRING_LEN + 1]; char *p = uuid; int i; - static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; - const u8 *index = be; + const u8 *index = uuid_be_index; bool uc = false; switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ case 'l': - index = le; + index = uuid_le_index; break; case 'B': uc = true; -- cgit v1.2.3 From e3a93bce69ad3e2c38927abe311b8cb4f17abbaf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:07 -0700 Subject: lib/uuid.c: remove FSF address There is no point in keeping an address in the file since it's subject to change. While here, update Intel Copyright years. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/uuid.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/uuid.c b/lib/uuid.c index 82787f652fbc..e116ae5fa00f 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -1,7 +1,7 @@ /* * Unified UUID/GUID definition * - * Copyright (C) 2009, Intel Corp. + * Copyright (C) 2009, 2016 Intel Corp. * Huang Ying * * This program is free software; you can redistribute it and/or @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -- cgit v1.2.3 From 57578c2ea2cb2e0d362a9212ac83cf90221d4883 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:54 -0700 Subject: raxix-tree: introduce CONFIG_RADIX_TREE_MULTIORDER I've been receiving increasingly concerned notes from 0day about how much my recent changes have been bloating the radix tree. Make it happier by only including multiorder support if CONFIG_TRANSPARENT_HUGEPAGES is set. This is an independent Kconfig option, so other radix tree users can also set it if they have a need. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 +++ lib/radix-tree.c | 26 ++++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 61d55bd0ed89..d79909dc01ec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -362,6 +362,9 @@ config INTERVAL_TREE for more information. +config RADIX_TREE_MULTIORDER + bool + config ASSOCIATIVE_ARRAY bool help diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1624c4117961..799f341977d0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -484,6 +484,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = node->slots[offset]; } +#ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ if ((shift - order) > 0) { int i, n = 1 << (shift - order); @@ -499,6 +500,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, node->count++; } } +#endif if (nodep) *nodep = node; @@ -1469,6 +1471,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, return deleted; } +static inline void delete_sibling_entries(struct radix_tree_node *node, + void *ptr, unsigned offset) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + int i; + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + node->slots[offset + i] = NULL; + node->count--; + } +#endif +} + /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root @@ -1484,7 +1500,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset, i; + unsigned int offset; void **slot; void *entry; int tag; @@ -1513,13 +1529,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } - /* Delete any sibling slots pointing to this slot */ - for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[offset + i] != ptr_to_indirect(slot)) - break; - node->slots[offset + i] = NULL; - node->count--; - } + delete_sibling_entries(node, ptr_to_indirect(slot), offset); node->slots[offset] = NULL; node->count--; -- cgit v1.2.3 From db050f2924fcf39428bdadf28970a32cfaf256ef Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:57 -0700 Subject: radix-tree: add missing sibling entry functionality The code I previously added to enable multiorder radix tree entries was untested and therefore buggy. This commit adds the support functions that Ross and I decided were necessary over a four-week period of iterating various designs. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 799f341977d0..585965afc808 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -80,6 +80,46 @@ static inline void *indirect_to_ptr(void *ptr) return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#ifdef CONFIG_RADIX_TREE_MULTIORDER +/* Sibling slots point directly to another slot in the same node */ +static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) +{ + void **ptr = node; + return (parent->slots <= ptr) && + (ptr < parent->slots + RADIX_TREE_MAP_SIZE); +} +#else +static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) +{ + return false; +} +#endif + +static inline unsigned long get_slot_offset(struct radix_tree_node *parent, + void **slot) +{ + return slot - parent->slots; +} + +static unsigned radix_tree_descend(struct radix_tree_node *parent, + struct radix_tree_node **nodep, unsigned offset) +{ + void **entry = rcu_dereference_raw(parent->slots[offset]); + +#ifdef CONFIG_RADIX_TREE_MULTIORDER + if (radix_tree_is_indirect_p