summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-27 18:09:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-27 18:09:44 -0700
commit7f3d08b255d1806502e45fe70ca2ba9646eb3aa1 (patch)
treeb05f285f31a37dac5910c40a643c7d38b19dfe3c
parent916a75965e5236f9e353416a703a0f4c8de2f56c (diff)
parentc8dbea6df351df211216b5f8877b020f21ecf75f (diff)
downloadlinux-7f3d08b255d1806502e45fe70ca2ba9646eb3aa1.tar.gz
linux-7f3d08b255d1806502e45fe70ca2ba9646eb3aa1.tar.bz2
linux-7f3d08b255d1806502e45fe70ca2ba9646eb3aa1.zip
Merge tag 'printk-for-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux
Pull printk updates from Petr Mladek: - Stop synchronizing kernel log buffer readers by logbuf_lock. As a result, the access to the buffer is fully lockless now. Note that printk() itself still uses locks because it tries to flush the messages to the console immediately. Also the per-CPU temporary buffers are still there because they prevent infinite recursion and serialize backtraces from NMI. All this is going to change in the future. - kmsg_dump API rework and cleanup as a side effect of the logbuf_lock removal. - Make bstr_printf() aware that %pf and %pF formats could deference the given pointer. - Show also page flags by %pGp format. - Clarify the documentation for plain pointer printing. - Do not show no_hash_pointers warning multiple times. - Update Senozhatsky email address. - Some clean up. * tag 'printk-for-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: (24 commits) lib/vsprintf.c: remove leftover 'f' and 'F' cases from bstr_printf() printk: clarify the documentation for plain pointer printing kernel/printk.c: Fixed mundane typos printk: rename vprintk_func to vprintk vsprintf: dump full information of page flags in pGp mm, slub: don't combine pr_err with INFO mm, slub: use pGp to print page flags MAINTAINERS: update Senozhatsky email address lib/vsprintf: do not show no_hash_pointers message multiple times printk: console: remove unnecessary safe buffer usage printk: kmsg_dump: remove _nolock() variants printk: remove logbuf_lock printk: introduce a kmsg_dump iterator printk: kmsg_dumper: remove @active field printk: add syslog_lock printk: use atomic64_t for devkmsg_user.seq printk: use seqcount_latch for clear_seq printk: introduce CONSOLE_LOG_MAX printk: consolidate kmsg_dump_get_buffer/syslog_print_all code printk: refactor kmsg_dump_get_buffer() ...
-rw-r--r--Documentation/core-api/printk-formats.rst28
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/powerpc/kernel/nvram_64.c8
-rw-r--r--arch/powerpc/xmon/xmon.c6
-rw-r--r--arch/um/kernel/kmsg_dump.c13
-rw-r--r--drivers/hv/vmbus_drv.c4
-rw-r--r--drivers/mtd/mtdoops.c17
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--include/linux/kmsg_dump.h47
-rw-r--r--kernel/debug/kdb/kdb_main.c10
-rw-r--r--kernel/printk/internal.h7
-rw-r--r--kernel/printk/printk.c478
-rw-r--r--kernel/printk/printk_safe.c30
-rw-r--r--lib/test_printf.c90
-rw-r--r--lib/vsprintf.c78
-rw-r--r--mm/slub.c13
16 files changed, 502 insertions, 340 deletions
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 160e710d992f..9be6de402cb9 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -79,7 +79,19 @@ Pointers printed without a specifier extension (i.e unadorned %p) are
hashed to prevent leaking information about the kernel memory layout. This
has the added benefit of providing a unique identifier. On 64-bit machines
the first 32 bits are zeroed. The kernel will print ``(ptrval)`` until it
-gathers enough entropy. If you *really* want the address see %px below.
+gathers enough entropy.
+
+When possible, use specialised modifiers such as %pS or %pB (described below)
+to avoid the need of providing an unhashed address that has to be interpreted
+post-hoc. If not possible, and the aim of printing the address is to provide
+more information for debugging, use %p and boot the kernel with the
+``no_hash_pointers`` parameter during debugging, which will print all %p
+addresses unmodified. If you *really* always want the unmodified address, see
+%px below.
+
+If (and only if) you are printing addresses as a content of a virtual file in
+e.g. procfs or sysfs (using e.g. seq_printf(), not printk()) read by a
+userspace process, use the %pK modifier described below instead of %p or %px.
Error Pointers
--------------
@@ -139,6 +151,11 @@ For printing kernel pointers which should be hidden from unprivileged
users. The behaviour of %pK depends on the kptr_restrict sysctl - see
Documentation/admin-guide/sysctl/kernel.rst for more details.
+This modifier is *only* intended when producing content of a file read by
+userspace from e.g. procfs or sysfs, not for dmesg. Please refer to the
+section about %p above for discussion about how to manage hashing pointers
+in printk().
+
Unmodified Addresses
--------------------
@@ -153,6 +170,13 @@ equivalent to %lx (or %lu). %px is preferred because it is more uniquely
grep'able. If in the future we need to modify the way the kernel handles
printing pointers we will be better equipped to find the call sites.
+Before using %px, consider if using %p is sufficient together with enabling the
+``no_hash_pointers`` kernel parameter during debugging sessions (see the %p
+description above). One valid scenario for %px might be printing information
+immediately before a panic, which prevents any sensitive information to be
+exploited anyway, and with %px there would be no need to reproduce the panic
+with no_hash_pointers.
+
Pointer Differences
-------------------
@@ -540,7 +564,7 @@ Flags bitfields such as page flags, gfp_flags
::
- %pGp referenced|uptodate|lru|active|private
+ %pGp referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff
%pGg GFP_USER|GFP_DMA32|GFP_NOWARN
%pGv read|exec|mayread|maywrite|mayexec|denywrite
diff --git a/MAINTAINERS b/MAINTAINERS
index 411f98d96a80..04e7de8c95be 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14535,7 +14535,7 @@ F: kernel/sched/psi.c
PRINTK
M: Petr Mladek <pmladek@suse.com>
-M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+M: Sergey Senozhatsky <senozhatsky@chromium.org>
R: Steven Rostedt <rostedt@goodmis.org>
R: John Ogness <john.ogness@linutronix.de>
S: Maintained
@@ -19388,7 +19388,7 @@ F: drivers/net/vrf.c
VSPRINTF
M: Petr Mladek <pmladek@suse.com>
M: Steven Rostedt <rostedt@goodmis.org>
-M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+M: Sergey Senozhatsky <senozhatsky@chromium.org>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Rasmus Villemoes <linux@rasmusvillemoes.dk>
S: Maintained
@@ -20039,7 +20039,7 @@ F: drivers/staging/media/zoran/
ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
M: Minchan Kim <minchan@kernel.org>
M: Nitin Gupta <ngupta@vflare.org>
-R: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+R: Sergey Senozhatsky <senozhatsky@chromium.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/admin-guide/blockdev/zram.rst
@@ -20053,7 +20053,7 @@ F: drivers/tty/serial/zs.*
ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
M: Minchan Kim <minchan@kernel.org>
M: Nitin Gupta <ngupta@vflare.org>
-R: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+R: Sergey Senozhatsky <senozhatsky@chromium.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/vm/zsmalloc.rst
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 532f22637783..3c8d9bbb51cf 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -647,6 +647,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
{
struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
static unsigned int oops_count = 0;
+ static struct kmsg_dump_iter iter;
static bool panicking = false;
static DEFINE_SPINLOCK(lock);
unsigned long flags;
@@ -681,13 +682,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
return;
if (big_oops_buf) {
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
big_oops_buf, big_oops_buf_sz, &text_len);
rc = zip_oops(text_len);
}
if (rc != 0) {
- kmsg_dump_rewind(dumper);
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 3fe37495f63d..bf7d69625a2e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3001,7 +3001,7 @@ print_address(unsigned long addr)
static void
dump_log_buf(void)
{
- struct kmsg_dumper dumper = { .active = 1 };
+ struct kmsg_dump_iter iter;
unsigned char buf[128];
size_t len;
@@ -3013,9 +3013,9 @@ dump_log_buf(void)
catch_memory_errors = 1;
sync();
- kmsg_dump_rewind_nolock(&dumper);
+ kmsg_dump_rewind(&iter);
xmon_start_pagination();
- while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) {
+ while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) {
buf[len] = '\0';
printf("%s", buf);
}
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 6516ef1f8274..0224fcb36e22 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmsg_dump.h>
+#include <linux/spinlock.h>
#include <linux/console.h>
#include <linux/string.h>
#include <shared/init.h>
@@ -9,8 +10,11 @@
static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
+ static struct kmsg_dump_iter iter;
+ static DEFINE_SPINLOCK(lock);
static char line[1024];
struct console *con;
+ unsigned long flags;
size_t len = 0;
/* only dump kmsg when no console is available */
@@ -29,11 +33,18 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
if (con)
return;
+ if (!spin_trylock_irqsave(&lock, flags))
+ return;
+
+ kmsg_dump_rewind(&iter);
+
printf("kmsg_dump:\n");
- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) {
+ while (kmsg_dump_get_line(&iter, true, line, sizeof(line), &len)) {
line[len] = '\0';
printf("%s", line);
}
+
+ spin_unlock_irqrestore(&lock, flags);
}
static struct kmsg_dumper kmsg_dumper = {
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index b12d6827b222..92cb3f7d21d9 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1399,6 +1399,7 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
+ struct kmsg_dump_iter iter;
size_t bytes_written;
/* We are only interested in panics. */
@@ -1409,7 +1410,8 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
- kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (!bytes_written)
return;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 774970bfcf85..862c4a889234 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -52,6 +52,7 @@ static struct mtdoops_context {
int nextcount;
unsigned long *oops_page_used;
+ unsigned long oops_buf_busy;
void *oops_buf;
} oops_cxt;
@@ -180,6 +181,9 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
u32 *hdr;
int ret;
+ if (test_and_set_bit(0, &cxt->oops_buf_busy))
+ return;
+
/* Add mtdoops header to the buffer */
hdr = cxt->oops_buf;
hdr[0] = cxt->nextcount;
@@ -190,7 +194,7 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
record_size, &retlen, cxt->oops_buf);
if (ret == -EOPNOTSUPP) {
printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
- return;
+ goto out;
}
} else
ret = mtd_write(mtd, cxt->nextpage * record_size,
@@ -203,6 +207,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
memset(cxt->oops_buf, 0xff, record_size);
mtdoops_inc_counter(cxt);
+out:
+ clear_bit(0, &cxt->oops_buf_busy);
}
static void mtdoops_workfunc_write(struct work_struct *work)
@@ -271,13 +277,19 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
{
struct mtdoops_context *cxt = container_of(dumper,
struct mtdoops_context, dump);
+ struct kmsg_dump_iter iter;
/* Only dump oopses if dump_oops is set */
if (reason == KMSG_DUMP_OOPS && !dump_oops)
return;
- kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
+ kmsg_dump_rewind(&iter);
+
+ if (test_and_set_bit(0, &cxt->oops_buf_busy))
+ return;
+ kmsg_dump_get_buffer(&iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
record_size - MTDOOPS_HEADER_SIZE, NULL);
+ clear_bit(0, &cxt->oops_buf_busy);
if (reason != KMSG_DUMP_OOPS) {
/* Panics must be written immediately */
@@ -394,6 +406,7 @@ static int __init mtdoops_init(void)
return -ENOMEM;
}
memset(cxt->oops_buf, 0xff, record_size);
+ cxt->oops_buf_busy = 0;
INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase);
INIT_WORK(&cxt->work_write, mtdoops_workfunc_write);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d963ae7902f9..b9614db48b1d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -385,6 +385,7 @@ void pstore_record_init(struct pstore_record *record,
static void pstore_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
+ struct kmsg_dump_iter iter;
unsigned long total = 0;
const char *why;
unsigned int part = 1;
@@ -405,6 +406,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
}
}
+ kmsg_dump_rewind(&iter);
+
oopscount++;
while (total < kmsg_bytes) {
char *dst;
@@ -435,7 +438,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
dst_size -= header_size;
/* Write dump contents. */
- if (!kmsg_dump_get_buffer(dumper, true, dst + header_size,
+ if (!kmsg_dump_get_buffer(&iter, true, dst + header_size,
dst_size, &dump_size))
break;
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 3378bcbe585e..906521c2329c 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -30,6 +30,16 @@ enum kmsg_dump_reason {
};
/**
+ * struct kmsg_dump_iter - iterator for retrieving kernel messages
+ * @cur_seq: Points to the oldest message to dump
+ * @next_seq: Points after the newest message to dump
+ */
+struct kmsg_dump_iter {
+ u64 cur_seq;
+ u64 next_seq;
+};
+
+/**
* struct kmsg_dumper - kernel crash message dumper structure
* @list: Entry in the dumper list (private)
* @dump: Call into dumping code which will retrieve the data with
@@ -41,31 +51,19 @@ struct kmsg_dumper {
struct list_head list;
void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
enum kmsg_dump_reason max_reason;
- bool active;
bool registered;
-
- /* private state of the kmsg iterator */
- u32 cur_idx;
- u32 next_idx;
- u64 cur_seq;
- u64 next_seq;
};
#ifdef CONFIG_PRINTK
void kmsg_dump(enum kmsg_dump_reason reason);
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
- char *line, size_t size, size_t *len);
-
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
char *line, size_t size, size_t *len);
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
- char *buf, size_t size, size_t *len);
-
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
+bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
+ char *buf, size_t size, size_t *len_out);
-void kmsg_dump_rewind(struct kmsg_dumper *dumper);
+void kmsg_dump_rewind(struct kmsg_dump_iter *iter);
int kmsg_dump_register(struct kmsg_dumper *dumper);
@@ -77,30 +75,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
{
}
-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper,
- bool syslog, const char *line,
- size_t size, size_t *len)
-{
- return false;
-}
-
-static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
const char *line, size_t size, size_t *len)
{
return false;
}
-static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
char *buf, size_t size, size_t *len)
{
return false;
}
-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
-{
-}
-
-static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
{
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 405d24aa5156..1baa96a2ecb8 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2077,7 +2077,7 @@ static int kdb_dmesg(int argc, const char **argv)
int adjust = 0;
int n = 0;
int skip = 0;
- struct kmsg_dumper dumper = { .active = 1 };
+ struct kmsg_dump_iter iter;
size_t len;
char buf[201];
@@ -2102,8 +2102,8 @@ static int kdb_dmesg(int argc, const char **argv)
kdb_set(2, setargs);
}
- kmsg_dump_rewind_nolock(&dumper);
- while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
+ kmsg_dump_rewind(&iter);
+ while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL))
n++;
if (lines < 0) {
@@ -2135,8 +2135,8 @@ static int kdb_dmesg(int argc, const char **argv)
if (skip >= n || skip < 0)
return 0;
- kmsg_dump_rewind_nolock(&dumper);
- while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
+ kmsg_dump_rewind(&iter);
+ while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) {
if (skip) {
skip--;
continue;
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 3a8fd491758c..51615c909b2f 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -12,8 +12,6 @@
#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
-extern raw_spinlock_t logbuf_lock;
-
__printf(4, 0)
int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
@@ -21,7 +19,6 @@ int vprintk_store(int facility, int level,
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
void __printk_safe_enter(void);
void __printk_safe_exit(void);
@@ -56,10 +53,8 @@ void defer_console_output(void);
#else
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
-
/*
- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem
+ * In !PRINTK builds we still export console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
* printk-safe must preserve the existing local IRQ guarantees.
*/
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 575a34b88936..421c35571797 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -262,7 +262,7 @@ static void __up_console_sem(unsigned long ip)
* definitely not the perfect debug tool (we don't know if _WE_
* hold it and are racing, but it helps tracking those weird code
* paths in the console code where we end up in places I want
- * locked without the console sempahore held).
+ * locked without the console semaphore held).
*/
static int console_locked, console_suspended;
@@ -355,62 +355,50 @@ enum log_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
-/*
- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
- * within the scheduler's rq lock. It must be released before calling
- * console_unlock() or anything else that might wake up a process.
- */
-DEFINE_RAW_SPINLOCK(logbuf_lock);
-
-/*
- * Helper macros to lock/unlock logbuf_lock and switch between
- * printk-safe/unsafe modes.
- */
-#define logbuf_lock_irq() \
- do { \
- printk_safe_enter_irq(); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
-
-#define logbuf_unlock_irq() \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irq(); \
- } while (0)
-
-#define logbuf_lock_irqsave(flags) \
- do { \
- printk_safe_enter_irqsave(flags); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
-
-#define logbuf_unlock_irqrestore(flags) \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irqrestore(flags); \
- } while (0)
+/* syslog_lock protects syslog_* variables and write access to clear_seq. */
+static DEFINE_RAW_SPINLOCK(syslog_lock);
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
+/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
+/* All 3 protected by @console_sem. */
/* the next printk record to write to the console */
static u64 console_seq;
static u64 exclusive_console_stop_seq;
static unsigned long console_dropped;
-/* the next printk record to read after the last 'clear' command */
-static u64 clear_seq;
+struct latched_seq {
+ seqcount_latch_t latch;
+ u64 val[2];
+};
+
+/*
+ * The next printk record to read after the last 'clear' command. There are
+ * two copies (updated with seqcount_latch) so that reads can locklessly
+ * access a valid value. Writers are synchronized by @syslog_lock.
+ */
+static struct latched_seq clear_seq = {
+ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
+ .val[0] = 0,
+ .val[1] = 0,
+};
#ifdef CONFIG_PRINTK_CALLER
#define PREFIX_MAX 48
#else
#define PREFIX_MAX 32
#endif
-#define LOG_LINE_MAX (1024 - PREFIX_MAX)
+
+/* the maximum size of a formatted record (i.e. with prefix added per line) */
+#define CONSOLE_LOG_MAX 1024
+
+/* the maximum size allowed to be reserved for a record */
+#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX)
#define LOG_LEVEL(v) ((v) & 0x07)
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
@@ -452,6 +440,31 @@ bool printk_percpu_data_ready(void)
return __printk_percpu_data_ready;
}
+/* Must be called under syslog_lock. */
+static void latched_seq_write(struct latched_seq *ls, u64 val)
+{
+ raw_write_seqcount_latch(&ls->latch);
+ ls->val[0] = val;
+ raw_write_seqcount_latch(&ls->latch);
+ ls->val[1] = val;
+}
+
+/* Can be called from any context. */
+static u64 latched_seq_read_nolock(struct latched_seq *ls)
+{
+ unsigned int seq;
+ unsigned int idx;
+ u64 val;
+
+ do {
+ seq = raw_read_seqcount_latch(&ls->latch);
+ idx = seq & 0x1;
+ val = ls->val[idx];
+ } while (read_seqcount_latch_retry(&ls->latch, seq));
+
+ return val;
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -619,7 +632,7 @@ out:
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
- u64 seq;
+ atomic64_t seq;
struct ratelimit_state rs;
struct mutex lock;
char buf[CONSOLE_EXT_LOG_MAX];
@@ -719,27 +732,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (ret)
return ret;
- logbuf_lock_irq();
- if (!prb_read_valid(prb, user->seq, r)) {
+ printk_safe_enter_irq();
+ if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
goto out;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb, user->seq, r));
+ prb_read_valid(prb, atomic64_read(&user->seq), r));
if (ret)
goto out;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
}
- if (r->info->seq != user->seq) {
+ if (r->info->seq != atomic64_read(&user->seq)) {
/* our last seen message is gone, return error and reset */
- user->seq = r->info->seq;
+ atomic64_set(&user->seq, r->info->seq);
ret = -EPIPE;
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
goto out;
}
@@ -748,8 +761,8 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
&r->text_buf[0], r->info->text_len,
&r->info->dev_info);
- user->seq = r->info->seq + 1;
- logbuf_unlock_irq();
+ atomic64_set(&user->seq, r->info->seq + 1);
+ printk_safe_exit_irq();
if (len > count) {
ret = -EINVAL;
@@ -784,11 +797,11 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
if (offset)
return -ESPIPE;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
switch (whence) {
case SEEK_SET:
/* the first record */
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
break;
case SEEK_DATA:
/*
@@ -796,16 +809,16 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- user->seq = clear_seq;
+ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
break;
case SEEK_END:
/* after the last record */
- user->seq = prb_next_seq(prb);
+ atomic64_set(&user->seq, prb_next_seq(prb));
break;
default:
ret = -EINVAL;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
return ret;
}
@@ -820,15 +833,15 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
- logbuf_lock_irq();
- if (prb_read_valid_info(prb, user->seq, &info, NULL)) {
+ printk_safe_enter_irq();
+ if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
/* return error when data has vanished underneath us */
- if (info.seq != user->seq)
+ if (info.seq != atomic64_read(&user->seq))
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
else
ret = EPOLLIN|EPOLLRDNORM;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
return ret;
}
@@ -861,9 +874,9 @@ static int devkmsg_open(struct inode *inode, struct file *file)
prb_rec_init_rd(&user->record, &user->info,
&user->text_buf[0], sizeof(user->text_buf));
- logbuf_lock_irq();
- user->seq = prb_first_valid_seq(prb);
- logbuf_unlock_irq();
+ printk_safe_enter_irq();
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
+ printk_safe_exit_irq();
file->private_data = user;
return 0;
@@ -955,6 +968,9 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_SIZE(atomic_long_t);
VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
+
+ VMCOREINFO_STRUCT_SIZE(latched_seq);
+ VMCOREINFO_OFFSET(latched_seq, val);
}
#endif
@@ -1421,6 +1437,50 @@ static size_t get_record_print_text_size(struct printk_info *info,
return ((prefix_len * line_count) + info->text_len + 1);
}
+/*
+ * Beginning with @start_seq, find the first record where it and all following
+ * records up to (but not including) @max_seq fit into @size.
+ *
+ * @max_seq is simply an upper bound and does not need to exist. If the caller
+ * does not require an upper bound, -1 can be used for @max_seq.
+ */
+static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
+ bool syslog, bool time)
+{
+ struct printk_info info;
+ unsigned int line_count;
+ size_t len = 0;
+ u64 seq;
+
+ /* Determine the size of the records up to @max_seq. */
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ if (info.seq >= max_seq)
+ break;
+ len += get_record_print_text_size(&info, line_count, syslog, time);
+ }
+
+ /*
+ * Adjust the upper bound for the next loop to avoid subtracting
+ * lengths that were never added.
+ */
+ if (seq < max_seq)
+ max_seq = seq;
+
+ /*
+ * Move first record forward until length fits into the buffer. Ignore
+ * newest messages that were not counted in the above cycle. Messages
+ * might appear and get lost in the meantime. This is a best effort
+ * that prevents an infinite loop that could occur with a retry.
+ */
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ if (len <= size || info.seq >= max_seq)
+ break;
+ len -= get_record_print_text_size(&info, line_count, syslog, time);
+ }
+
+ return seq;
+}
+
static int syslog_print(char __user *buf, int size)
{
struct printk_info info;
@@ -1428,19 +1488,21 @@ static int syslog_print(char __user *buf, int size)
char *text;
int len = 0;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
while (size > 0) {
size_t n;
size_t skip;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
+ raw_spin_lock(&syslog_lock);
if (!prb_read_valid(prb, syslog_seq, &r)) {
- logbuf_unlock_irq();
+ raw_spin_unlock(&syslog_lock);
+ printk_safe_exit_irq();
break;
}
if (r.info->seq != syslog_seq) {
@@ -1469,7 +1531,8 @@ static int syslog_print(char __user *buf, int size)
syslog_partial += n;
} else
n = 0;
- logbuf_unlock_irq();
+ raw_spin_unlock(&syslog_lock);
+ printk_safe_exit_irq();
if (!n)
break;
@@ -1492,34 +1555,26 @@ static int syslog_print(char __user *buf, int size)
static int syslog_print_all(char __user *buf, int size, bool clear)
{
struct printk_info info;
- unsigned int line_count;
struct printk_record r;
char *text;
int len = 0;
u64 seq;
bool time;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
time = printk_time;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
- len += get_record_print_text_size(&info, line_count, true, time);
+ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
+ size, true, time);
- /* move first record forward until length fits into the buffer */
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
- if (len <= size)
- break;
- len -= get_record_print_text_size(&info, line_count, true, time);
- }
-
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
len = 0;
prb_for_each_record(seq, prb, seq, &r) {
@@ -1532,20 +1587,23 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
break;
}
- logbuf_unlock_irq();
+ printk_saf