summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 10:32:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 10:32:08 -0700
commit537e62c865dcb9b91d07ed83f8615b71fa0b51bb (patch)
tree6874d3c223e11b6a03da735caab862c06412ede1
parent2e17ce1106e04a7f3a83796ec623881487f75dd3 (diff)
parent1c6fd59943a630f087b4b3089aefdcb0bf43cca0 (diff)
downloadlinux-537e62c865dcb9b91d07ed83f8615b71fa0b51bb.tar.gz
linux-537e62c865dcb9b91d07ed83f8615b71fa0b51bb.tar.bz2
linux-537e62c865dcb9b91d07ed83f8615b71fa0b51bb.zip
Merge tag 'printk-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux
Pull printk updates from Petr Mladek: - Offload writing printk() messages on consoles to per-console kthreads. It prevents soft-lockups when an extensive amount of messages is printed. It was observed, for example, during boot of large systems with a lot of peripherals like disks or network interfaces. It prevents live-lockups that were observed, for example, when messages about allocation failures were reported and a CPU handled consoles instead of reclaiming the memory. It was hard to solve even with rate limiting because it would need to take into account the amount of messages and the speed of all consoles. It is a must to have for real time. Otherwise, any printk() might break latency guarantees. The per-console kthreads allow to handle each console on its own speed. Slow consoles do not longer slow down faster ones. And printk() does not longer unpredictably slows down various code paths. There are situations when the kthreads are either not available or not reliable, for example, early boot, suspend, or panic. In these situations, printk() uses the legacy mode and tries to handle consoles immediately. - Add documentation for the printk index. * tag 'printk-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: printk, tracing: fix console tracepoint printk: remove @console_locked printk: extend console_lock for per-console locking printk: add kthread console printers printk: add functions to prefer direct printing printk: add pr_flush() printk: move buffer definitions into console_emit_next_record() caller printk: refactor and rework printing logic printk: add con_printk() macro for console details printk: call boot_delay_msec() in printk_delay() printk: get caller_id/timestamp after migration disable printk: wake waiters for safe and NMI contexts printk: wake up all waiters printk: add missing memory barrier to wake_up_klogd() printk: cpu sync always disable interrupts printk: rename cpulock functions printk/index: Printk index feature documentation MAINTAINERS: Add printk indexing maintainers on mention of printk_index
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/core-api/printk-index.rst137
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/tty/sysrq.c2
-rw-r--r--include/linux/console.h19
-rw-r--r--include/linux/printk.h82
-rw-r--r--kernel/hung_task.c11
-rw-r--r--kernel/panic.c4
-rw-r--r--kernel/printk/printk.c1205
-rw-r--r--kernel/rcu/tree_stall.h2
-rw-r--r--kernel/reboot.c14
-rw-r--r--kernel/watchdog.c4
-rw-r--r--kernel/watchdog_hld.c4
-rw-r--r--lib/dump_stack.c4
-rw-r--r--lib/nmi_backtrace.c4
15 files changed, 1174 insertions, 321 deletions
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 972d46a5ddf6..8a3fce1fe582 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -20,6 +20,7 @@ it.
workqueue
printk-basics
printk-formats
+ printk-index
symbol-namespaces
Data structures and low-level utilities
diff --git a/Documentation/core-api/printk-index.rst b/Documentation/core-api/printk-index.rst
new file mode 100644
index 000000000000..3062f37d119b
--- /dev/null
+++ b/Documentation/core-api/printk-index.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Printk Index
+============
+
+There are many ways how to monitor the state of the system. One important
+source of information is the system log. It provides a lot of information,
+including more or less important warnings and error messages.
+
+There are monitoring tools that filter and take action based on messages
+logged.
+
+The kernel messages are evolving together with the code. As a result,
+particular kernel messages are not KABI and never will be!
+
+It is a huge challenge for maintaining the system log monitors. It requires
+knowing what messages were updated in a particular kernel version and why.
+Finding these changes in the sources would require non-trivial parsers.
+Also it would require matching the sources with the binary kernel which
+is not always trivial. Various changes might be backported. Various kernel
+versions might be used on different monitored systems.
+
+This is where the printk index feature might become useful. It provides
+a dump of printk formats used all over the source code used for the kernel
+and modules on the running system. It is accessible at runtime via debugfs.
+
+The printk index helps to find changes in the message formats. Also it helps
+to track the strings back to the kernel sources and the related commit.
+
+
+User Interface
+==============
+
+The index of printk formats are split in into separate files. The files are
+named according to the binaries where the printk formats are built-in. There
+is always "vmlinux" and optionally also modules, for example::
+
+ /sys/kernel/debug/printk/index/vmlinux
+ /sys/kernel/debug/printk/index/ext4
+ /sys/kernel/debug/printk/index/scsi_mod
+
+Note that only loaded modules are shown. Also printk formats from a module
+might appear in "vmlinux" when the module is built-in.
+
+The content is inspired by the dynamic debug interface and looks like::
+
+ $> head -1 /sys/kernel/debug/printk/index/vmlinux; shuf -n 5 vmlinux
+ # <level[,flags]> filename:line function "format"
+ <5> block/blk-settings.c:661 disk_stack_limits "%s: Warning: Device %s is misaligned\n"
+ <4> kernel/trace/trace.c:8296 trace_create_file "Could not create tracefs '%s' entry\n"
+ <6> arch/x86/kernel/hpet.c:144 _hpet_print_config "hpet: %s(%d):\n"
+ <6> init/do_mounts.c:605 prepare_namespace "Waiting for root device %s...\n"
+ <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+, where the meaning is:
+
+ - :level: log level value: 0-7 for particular severity, -1 as default,
+ 'c' as continuous line without an explicit log level
+ - :flags: optional flags: currently only 'c' for KERN_CONT
+ - :filename\:line: source filename and line number of the related
+ printk() call. Note that there are many wrappers, for example,
+ pr_warn(), pr_warn_once(), dev_warn().
+ - :function: function name where the printk() call is used.
+ - :format: format string
+
+The extra information makes it a bit harder to find differences
+between various kernels. Especially the line number might change
+very often. On the other hand, it helps a lot to confirm that
+it is the same string or find the commit that is responsible
+for eventual changes.
+
+
+printk() Is Not a Stable KABI
+=============================
+
+Several developers are afraid that exporting all these implementation
+details into the user space will transform particular printk() calls
+into KABI.
+
+But it is exactly the opposite. printk() calls must _not_ be KABI.
+And the printk index helps user space tools to deal with this.
+
+
+Subsystem specific printk wrappers
+==================================
+
+The printk index is generated using extra metadata that are stored in
+a dedicated .elf section ".printk_index". It is achieved using macro
+wrappers doing __printk_index_emit() together with the real printk()
+call. The same technique is used also for the metadata used by
+the dynamic debug feature.
+
+The metadata are stored for a particular message only when it is printed
+using these special wrappers. It is implemented for the commonly
+used printk() calls, including, for example, pr_warn(), or pr_once().
+
+Additional changes are necessary for various subsystem specific wrappers
+that call the original printk() via a common helper function. These needs
+their own wrappers adding __printk_index_emit().
+
+Only few subsystem specific wrappers have been updated so far,
+for example, dev_printk(). As a result, the printk formats from
+some subsystes can be missing in the printk index.
+
+
+Subsystem specific prefix
+=========================
+
+The macro pr_fmt() macro allows to define a prefix that is printed
+before the string generated by the related printk() calls.
+
+Subsystem specific wrappers usually add even more complicated
+prefixes.
+
+These prefixes can be stored into the printk index metadata
+by an optional parameter of __printk_index_emit(). The debugfs
+interface might then show the printk formats including these prefixes.
+For example, drivers/acpi/osl.c contains::
+
+ #define pr_fmt(fmt) "ACPI: OSL: " fmt
+
+ static int __init acpi_no_auto_serialize_setup(char *str)
+ {
+ acpi_gbl_auto_serialize_methods = FALSE;
+ pr_info("Auto-serialization disabled\n");
+
+ return 1;
+ }
+
+This results in the following printk index entry::
+
+ <6> drivers/acpi/osl.c:1410 acpi_no_auto_serialize_setup "ACPI: auto-serialization disabled\n"
+
+It helps matching messages from the real log with printk index.
+Then the source file name, line number, and function name can
+be used to match the string with the source code.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7af43b32e06d..309b45217022 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15923,7 +15923,9 @@ F: kernel/printk/
PRINTK INDEXING
R: Chris Down <chris@chrisdown.name>
S: Maintained
+F: Documentation/core-api/printk-index.rst
F: kernel/printk/index.c
+K: printk_index
PROC FILESYSTEM
L: linux-kernel@vger.kernel.org
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index bbfd004449b5..2884cd638d64 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -578,6 +578,7 @@ void __handle_sysrq(int key, bool check_mask)
rcu_sysrq_start();
rcu_read_lock();
+ printk_prefer_direct_enter();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
@@ -619,6 +620,7 @@ void __handle_sysrq(int key, bool check_mask)
pr_cont("\n");
console_loglevel = orig_log_level;
}
+ printk_prefer_direct_exit();
rcu_read_unlock();
rcu_sysrq_end();
diff --git a/include/linux/console.h b/include/linux/console.h
index 7cd758a4f44e..143653090c48 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,7 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/mutex.h>
struct vc_data;
struct console_font_op;
@@ -151,6 +152,24 @@ struct console {
int cflag;
uint ispeed;
uint ospeed;
+ u64 seq;
+ unsigned long dropped;
+ struct task_struct *thread;
+ bool blocked;
+
+ /*
+ * The per-console lock is used by printing kthreads to synchronize
+ * this console with callers of console_lock(). This is necessary in
+ * order to allow printing kthreads to run in parallel to each other,
+ * while each safely accessing the @blocked field and synchronizing
+ * against direct printing via console_lock/console_unlock.
+ *
+ * Note: For synchronizing against direct printing via
+ * console_trylock/console_unlock, see the static global
+ * variable @console_kthreads_active.
+ */
+ struct mutex lock;
+
void *data;
struct console *next;
};
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 1522df223c0f..cd26aab0ab2a 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -170,6 +170,11 @@ extern void __printk_safe_exit(void);
#define printk_deferred_enter __printk_safe_enter
#define printk_deferred_exit __printk_safe_exit
+extern void printk_prefer_direct_enter(void);
+extern void printk_prefer_direct_exit(void);
+
+extern bool pr_flush(int timeout_ms, bool reset_on_progress);
+
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
* with all other unrelated printk_ratelimit() callsites. Instead use
@@ -220,6 +225,19 @@ static inline void printk_deferred_exit(void)
{
}
+static inline void printk_prefer_direct_enter(void)
+{
+}
+
+static inline void printk_prefer_direct_exit(void)
+{
+}
+
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+ return true;
+}
+
static inline int printk_ratelimit(void)
{
return 0;
@@ -277,45 +295,57 @@ static inline void printk_trigger_flush(void)
#endif
#ifdef CONFIG_SMP
-extern int __printk_cpu_trylock(void);
-extern void __printk_wait_on_cpu_lock(void);
-extern void __printk_cpu_unlock(void);
+extern int __printk_cpu_sync_try_get(void);
+extern void __printk_cpu_sync_wait(void);
+extern void __printk_cpu_sync_put(void);
+
+#else
+
+#define __printk_cpu_sync_try_get() true
+#define __printk_cpu_sync_wait()
+#define __printk_cpu_sync_put()
+#endif /* CONFIG_SMP */
/**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- * lock and disable interrupts.
+ * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk
+ * cpu-reentrant spinning lock.
* @flags: Stack-allocated storage for saving local interrupt state,
- * to be passed to printk_cpu_unlock_irqrestore().
+ * to be passed to printk_cpu_sync_put_irqrestore().
*
* If the lock is owned by another CPU, spin until it becomes available.
* Interrupts are restored while spinning.
+ *
+ * CAUTION: This function must be used carefully. It does not behave like a
+ * typical lock. Here are important things to watch out for...
+ *
+ * * This function is reentrant on the same CPU. Therefore the calling
+ * code must not assume exclusive access to data if code accessing the
+ * data can run reentrant or within NMI context on the same CPU.
+ *
+ * * If there exists usage of this function from NMI context, it becomes
+ * unsafe to perform any type of locking or spinning to wait for other
+ * CPUs after calling this function from any context. This includes
+ * using spinlocks or any other busy-waiting synchronization methods.
*/
-#define printk_cpu_lock_irqsave(flags) \
- for (;;) { \
- local_irq_save(flags); \
- if (__printk_cpu_trylock()) \
- break; \
- local_irq_restore(flags); \
- __printk_wait_on_cpu_lock(); \
+#define printk_cpu_sync_get_irqsave(flags) \
+ for (;;) { \
+ local_irq_save(flags); \
+ if (__printk_cpu_sync_try_get()) \
+ break; \
+ local_irq_restore(flags); \
+ __printk_cpu_sync_wait(); \
}
/**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- * lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning
+ * lock and restore interrupts.
+ * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave().
*/
-#define printk_cpu_unlock_irqrestore(flags) \
+#define printk_cpu_sync_put_irqrestore(flags) \
do { \
- __printk_cpu_unlock(); \
+ __printk_cpu_sync_put(); \
local_irq_restore(flags); \
- } while (0) \
-
-#else
-
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
-
-#endif /* CONFIG_SMP */
+ } while (0)
extern int kptr_restrict;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 52501e5f7655..02a65d554340 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -127,6 +127,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
* complain:
*/
if (sysctl_hung_task_warnings) {
+ printk_prefer_direct_enter();
+
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -142,6 +144,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
if (sysctl_hung_task_all_cpu_backtrace)
hung_task_show_all_bt = true;
+
+ printk_prefer_direct_exit();
}
touch_nmi_watchdog();
@@ -204,12 +208,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
}
unlock:
rcu_read_unlock();
- if (hung_task_show_lock)
+ if (hung_task_show_lock) {
+ printk_prefer_direct_enter();
debug_show_all_locks();
+ printk_prefer_direct_exit();
+ }
if (hung_task_show_all_bt) {
hung_task_show_all_bt = false;
+ printk_prefer_direct_enter();
trigger_all_cpu_backtrace();
+ printk_prefer_direct_exit();
}
if (hung_task_call_panic)
diff --git a/kernel/panic.c b/kernel/panic.c
index 8355b19676f8..6737b2332275 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -579,6 +579,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
{
disable_trace_on_warning();
+ printk_prefer_direct_enter();
+
if (file)
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
raw_smp_processor_id(), current->pid, file, line,
@@ -608,6 +610,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
+
+ printk_prefer_direct_exit();
}
#ifndef __WARN_FLAGS
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index da03c15ecc89..a3e1035929b0 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -224,6 +224,33 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
static int nr_ext_console_drivers;
/*
+ * Used to synchronize printing kthreads against direct printing via
+ * console_trylock/console_unlock.
+ *
+ * Values:
+ * -1 = console kthreads atomically blocked (via global trylock)
+ * 0 = no kthread printing, console not locked (via trylock)
+ * >0 = kthread(s) actively printing
+ *
+ * Note: For synchronizing against direct printing via
+ * console_lock/console_unlock, see the @lock variable in
+ * struct console.
+ */
+static atomic_t console_kthreads_active = ATOMIC_INIT(0);
+
+#define console_kthreads_atomic_tryblock() \
+ (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0)
+#define console_kthreads_atomic_unblock() \
+ atomic_cmpxchg(&console_kthreads_active, -1, 0)
+#define console_kthreads_atomically_blocked() \
+ (atomic_read(&console_kthreads_active) == -1)
+
+#define console_kthread_printing_tryenter() \
+ atomic_inc_unless_negative(&console_kthreads_active)
+#define console_kthread_printing_exit() \
+ atomic_dec(&console_kthreads_active)
+
+/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
*/
@@ -271,19 +298,49 @@ static bool panic_in_progress(void)
}
/*
- * This is used for debugging the mess that is the VT code by
- * keeping track if we have the console semaphore held. It's
- * definitely not the perfect debug tool (we don't know if _WE_
- * hold it and are racing, but it helps tracking those weird code
- * paths in the console code where we end up in places I want
- * locked without the console semaphore held).
+ * Tracks whether kthread printers are all blocked. A value of true implies
+ * that the console is locked via console_lock() or the console is suspended.
+ * Writing to this variable requires holding @console_sem.
*/
-static int console_locked, console_suspended;
+static bool console_kthreads_blocked;
+
+/*
+ * Block all kthread printers from a schedulable context.
+ *
+ * Requires holding @console_sem.
+ */
+static void console_kthreads_block(void)
+{
+ struct console *con;
+
+ for_each_console(con) {
+ mutex_lock(&con->lock);
+ con->blocked = true;
+ mutex_unlock(&con->lock);
+ }
+
+ console_kthreads_blocked = true;
+}
/*
- * If exclusive_console is non-NULL then only this console is to be printed to.
+ * Unblock all kthread printers from a schedulable context.
+ *
+ * Requires holding @console_sem.
*/
-static struct console *exclusive_console;
+static void console_kthreads_unblock(void)
+{
+ struct console *con;
+
+ for_each_console(con) {
+ mutex_lock(&con->lock);
+ con->blocked = false;
+ mutex_unlock(&con->lock);
+ }
+
+ console_kthreads_blocked = false;
+}
+
+static int console_suspended;
/*
* Array of consoles built from command line options (console=)
@@ -366,7 +423,75 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);
+/*
+ * A flag to signify if printk_activate_kthreads() has already started the
+ * kthread printers. If true, any later registered consoles must start their
+ * own kthread directly. The flag is write protected by the console_lock.
+ */
+static bool printk_kthreads_available;
+
#ifdef CONFIG_PRINTK
+static atomic_t printk_prefer_direct = ATOMIC_INIT(0);
+
+/**
+ * printk_prefer_direct_enter - cause printk() calls to attempt direct
+ * printing to all enabled consoles
+ *
+ * Since it is not possible to call into the console printing code from any
+ * context, there is no guarantee that direct printing will occur.
+ *
+ * This globally effects all printk() callers.
+ *
+ * Context: Any context.
+ */
+void printk_prefer_direct_enter(void)
+{
+ atomic_inc(&printk_prefer_direct);
+}
+
+/**
+ * printk_prefer_direct_exit - restore printk() behavior
+ *
+ * Context: Any context.
+ */
+void printk_prefer_direct_exit(void)
+{
+ WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0);
+}
+
+/*
+ * Calling printk() always wakes kthread printers so that they can
+ * flush the new message to their respective consoles. Also, if direct
+ * printing is allowed, printk() tries to flush the messages directly.
+ *
+ * Direct printing is allowed in situations when the kthreads
+ * are not available or the system is in a problematic state.
+ *
+ * See the implementation about possible races.
+ */
+static inline bool allow_direct_printing(void)
+{
+ /*
+ * Checking kthread availability is a possible race because the
+ * kthread printers can become permanently disabled during runtime.
+ * However, doing that requires holding the console_lock, so any
+ * pending messages will be direct printed by console_unlock().
+ */
+ if (!printk_kthreads_available)
+ return true;
+
+ /*
+ * Prefer direct printing when the system is in a problematic state.
+ * The context that sets this state will always see the updated value.
+ * The other contexts do not care. Anyway, direct printing is just a
+ * best effort. The direct output is only possible when console_lock
+ * is not already taken and no kthread printers are actively printing.
+ */
+ return (system_state > SYSTEM_RUNNING ||
+ oops_in_progress ||
+ atomic_read(&printk_prefer_direct));
+}
+
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -374,12 +499,6 @@ static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
struct latched_seq {
seqcount_latch_t latch;
u64 val[2];
@@ -405,6 +524,9 @@ static struct latched_seq clear_seq = {
/* the maximum size of a formatted record (i.e. with prefix added per line) */
#define CONSOLE_LOG_MAX 1024
+/* the maximum size for a dropped text message */
+#define DROPPED_TEXT_MAX 64
+
/* the maximum size allowed to be reserved for a record */
#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX)
@@ -746,8 +868,19 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
goto out;
}
+ /*
+ * Guarantee this task is visible on the waitqueue before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * prepare_to_wait_event() pairs with the full memory barrier
+ * within wq_has_sleeper().
+ *
+ * This pairs with __wake_up_klogd:A.
+ */
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb, atomic64_read(&user->seq), r));
+ prb_read_valid(prb,
+ atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */
if (ret)
goto out;
}
@@ -1513,7 +1646,18 @@ static int syslog_print(char __user *buf, int size)
seq = syslog_seq;
mutex_unlock(&syslog_lock);
- len = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL));
+ /*
+ * Guarantee this task is visible on the waitqueue before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * prepare_to_wait_event() pairs with the full memory barrier
+ * within wq_has_sleeper().
+ *
+ * This pairs with __wake_up_klogd:A.
+ */
+ len = wait_event_interruptible(log_wait,
+ prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
mutex_lock(&syslog_lock);
if (len)
@@ -1911,47 +2055,24 @@ static int console_trylock_spinning(void)
}
/*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
+ * Call the specified console driver, asking it to write out the specified
+ * text and length. If @dropped_text is non-NULL and any records have been
+ * dropped, a dropped message will be written out first.
*/
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len)
+static void call_console_driver(struct console *con, const char *text, size_t len,
+ char *dropped_text)
{
- static char dropped_text[64];
- size_t dropped_len = 0;
- struct console *con;
-
- trace_console_rcuidle(text, len);
+ size_t dropped_len;
- if (!console_drivers)
- return;
-
- if (console_dropped) {
- dropped_len = snprintf(dropped_text, sizeof(dropped_text),
+ if (con->dropped && dropped_text) {
+ dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
"** %lu printk messages dropped **\n",
- console_dropped);
- console_dropped = 0;
+ con->dropped);
+ con->dropped = 0;
+ con->write(con, dropped_text, dropped_len);
}
- for_each_console(con) {
- if (exclusive_console && con != exclusive_console)
- continue;
- if (!(con->flags & CON_ENABLED))
- continue;
- if (!con->write)
- continue;
- if (!cpu_online(smp_processor_id()) &&
- !(con->flags & CON_ANYTIME))
- continue;
- if (con->flags & CON_EXTENDED)
- con->write(con, ext_text, ext_len);
- else {
- if (dropped_len)
- con->write(con, dropped_text, dropped_len);
- con->write(con, text, len);
- }
- }
+ con->write(con, text, len);
}
/*
@@ -2026,8 +2147,10 @@ static u8 *__printk_recursion_counter(void)
int printk_delay_msec __read_mostly;
-static inline void printk_delay(void)
+static inline void printk_delay(int level)
{
+ boot_delay_msec(level);
+
if (unlikely(printk_delay_msec)) {
int m = printk_delay_msec;
@@ -2041,7 +2164,7 @@ static inline void printk_delay(void)
static inline u32 printk_caller_id(void)
{
return in_task() ? task_pid_nr(current) :
- 0x80000000 + raw_smp_processor_id();
+ 0x80000000 + smp_processor_id();
}
/**
@@ -2115,6 +2238,8 @@ static u16 printk_sprint(char *text, u16 size, int facility,
}
}
+ trace_console_rcuidle(text, text_len);
+
return text_len;
}
@@ -2123,7 +2248,6 @@ int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
const char *fmt, va_list args)
{
- const u32 caller_id = printk_caller_id();
struct prb_reserved_entry e;
enum printk_info_flags flags = 0;
struct printk_record r;
@@ -2133,10 +2257,14 @@ int vprintk_store(int facility, int level,
u8 *recursion_ptr;
u16 reserve_size;
va_list args2;
+ u32 caller_id;
u16 text_len;
int ret = 0;
u64 ts_nsec;
+ if (!printk_enter_irqsave(recursion_ptr, irqflags))
+ return 0;
+
/*
* Since the duration of printk() can vary depending on the message
* and state of the ringbuffer, grab the timestamp now so that it is
@@ -2145,8 +2273,7 @@ int vprintk_store(int facility, int level,
*/
ts_nsec = local_clock();
- if (!printk_enter_irqsave(recursion_ptr, irqflags))
- return 0;
+ caller_id = printk_caller_id();
/*
* The sprintf needs to come first since the syslog prefix might be
@@ -2250,23 +2377,25 @@ asmlinkage int vprintk_emit(int facility, int level,
in_sched = true;
}
- boot_delay_msec(level);
- printk_delay();
+ printk_delay(level);
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
/* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (!in_sched && allow_direct_printing()) {
/*
- * Disable preemption to avoid being preempted while holding
- * console_sem which would prevent anyone from printing to
- * console
+ * The caller may be holding system-critical or
+ * timing-sensitive locks. Disable preemption during direct
+ * printing of all remaining records to all consoles so that
+ * this context can return as soon as possible. Hopefully
+ * another printk() caller will take over the printing.
*/
preempt_disable();
/*
* Try to acquire and then immediately release the console
- * semaphore. The release will print out buffers and wake up
- * /dev/kmsg and syslog() users.
+ * semaphore. The release will print out buffers. With the
+ * spinning variant, this context tries to take over the
+ * printing from another printing context.
*/
if (console_trylock_spinning())
console_unlock();
@@ -2297,18 +2426,21 @@ asmlinkage __visible int _printk(const char *fmt, ...)
}
EXPORT_SYMBOL(_printk);
+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
+
+static void printk_start_kthread(struct console *con);
+
#else /* CONFIG_PRINTK */
#define CONSOLE_LOG_MAX 0
+#define DROPPED_TEXT_MAX 0
#define printk_time false
#define prb_read_valid(rb, seq, r) false
#define prb_first_valid_seq(rb) 0
+#define prb_next_seq(rb) 0
static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
static size_t record_print_text(const struct printk_record *r,
bool syslog, bool time)
@@ -2325,9 +2457,14 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
struct dev_printk_info *dev_info) { return 0; }
static void console_lock_spinning_enable(void) { }
static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len) {}
+static void call_console_driver(struct console *con, const char *text, size_t len,
+ char *dropped_text)
+{
+}
static bool suppress_message_printing(int level) { return false; }
+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
+static void printk_start_kthread(struct console *con) { }
+static bool allow_direct_printing(void) { return true; }
#endif /* CONFIG_PRINTK */
@@ -2515,6 +2652,7 @@ void suspend_console(void)
if (!console_suspend_enabled)
return;
pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
+ pr_flush(1000, true);
console_lock();
console_suspended = 1;
up_console_sem();
@@ -2527,6 +2665,7 @@ void resume_console(void)
down_console_sem();
console_su