diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 6 | ||||
| -rw-r--r-- | kernel/bpf/trampoline.c | 3 | ||||
| -rw-r--r-- | kernel/events/core.c | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 37 | ||||
| -rw-r--r-- | kernel/irq/msi.c | 7 | ||||
| -rw-r--r-- | kernel/ksysfs.c | 18 | ||||
| -rw-r--r-- | kernel/params.c | 2 | ||||
| -rw-r--r-- | kernel/resource.c | 17 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 28 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 44 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 104 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 31 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 16 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 66 | ||||
| -rw-r--r-- | kernel/trace/trace_events_hist.c | 190 | ||||
| -rw-r--r-- | kernel/trace/trace_events_synth.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_events_trigger.c | 19 | ||||
| -rw-r--r-- | kernel/trace/trace_events_user.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_osnoise.c | 244 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 71 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.c | 2 |
25 files changed, 695 insertions, 224 deletions
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 84b2d9dba79a..ece9870cab68 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, refcount_set(&kvalue->refcnt, 1); bpf_map_inc(map); - set_memory_ro((long)st_map->image, 1); - set_memory_x((long)st_map->image, 1); + set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { /* Pair with smp_load_acquire() during lookup_elem(). diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7f98dec6e90f..6cca66b39d01 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -868,8 +868,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; } @@ -887,8 +886,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) if (ptr) { bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); } goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d6395215b849..11f5ec0b8016 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -468,8 +468,7 @@ again: if (err < 0) goto out; - set_memory_ro((long)im->image, 1); - set_memory_x((long)im->image, 1); + set_memory_rox((long)im->image, 1); WARN_ON(tr->cur_image && tr->selector == 0); WARN_ON(!tr->cur_image && tr->selector); diff --git a/kernel/events/core.c b/kernel/events/core.c index e47914ac8732..eacc3702654d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7493,7 +7493,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) return pud_leaf_size(pud); pmdp = pmd_offset_lockless(pudp, pud, addr); - pmd = READ_ONCE(*pmdp); + pmd = pmdp_get_lockless(pmdp); if (!pmd_present(pmd)) return 0; diff --git a/kernel/fork.c b/kernel/fork.c index 7a08025d2c99..9f7fe3541897 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2607,11 +2607,6 @@ struct task_struct * __init fork_idle(int cpu) return task; } -struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. @@ -3030,10 +3025,27 @@ static void sighand_ctor(void *data) init_waitqueue_head(&sighand->signalfd_wqh); } -void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size; + /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -3051,19 +3063,6 @@ void __init proc_caches_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index bd4d4dd626b4..955267bbc2be 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -165,7 +165,8 @@ static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) unsigned int hwsize; if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || - !dev->msi.data->__domains[ctrl->domid].domain)) + (dev->msi.domain && + !dev->msi.data->__domains[ctrl->domid].domain))) return false; hwsize = msi_domain_get_hwsize(dev, ctrl->domid); @@ -609,8 +610,8 @@ static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid info = domain->host_data; return info->hwsize; } - /* No domain, no size... */ - return 0; + /* No domain, default to MSI_XA_DOMAIN_SIZE */ + return MSI_XA_DOMAIN_SIZE; } static inline void irq_chip_write_msi_msg(struct irq_data *data, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 65dba9076f31..2df00b789b90 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -6,6 +6,7 @@ * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org> */ +#include <asm/byteorder.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/sysfs.h> @@ -20,6 +21,14 @@ #include <linux/rcupdate.h> /* rcu_expedited and rcu_normal */ +#if defined(__LITTLE_ENDIAN) +#define CPU_BYTEORDER_STRING "little" +#elif defined(__BIG_ENDIAN) +#define CPU_BYTEORDER_STRING "big" +#else +#error Unknown byteorder +#endif + #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -34,6 +43,14 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj, } KERNEL_ATTR_RO(uevent_seqnum); +/* cpu byteorder */ +static ssize_t cpu_byteorder_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", CPU_BYTEORDER_STRING); +} +KERNEL_ATTR_RO(cpu_byteorder); + #ifdef CONFIG_UEVENT_HELPER /* uevent helper program, used during early boot */ static ssize_t uevent_helper_show(struct kobject *kobj, @@ -215,6 +232,7 @@ EXPORT_SYMBOL_GPL(kernel_kobj); static struct attribute * kernel_attrs[] = { &fscaps_attr.attr, &uevent_seqnum_attr.attr, + &cpu_byteorder_attr.attr, #ifdef CONFIG_UEVENT_HELPER &uevent_helper_attr.attr, #endif diff --git a/kernel/params.c b/kernel/params.c index a06f80c56f19..14d66070757b 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -926,7 +926,7 @@ static const struct sysfs_ops module_sysfs_ops = { .store = module_attr_store, }; -static int uevent_filter(struct kobject *kobj) +static int uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); diff --git a/kernel/resource.c b/kernel/resource.c index 82ed54cd1f0d..ddbbacb9fb50 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -888,7 +888,7 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) if (conflict->end > new->end) new->end = conflict->end; - printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name); + pr_info("Expanded resource %s due to conflict with %s\n", new->name, conflict->name); } write_unlock(&resource_lock); } @@ -1283,9 +1283,7 @@ void __release_region(struct resource *parent, resource_size_t start, write_unlock(&resource_lock); - printk(KERN_WARNING "Trying to free nonexistent resource " - "<%016llx-%016llx>\n", (unsigned long long)start, - (unsigned long long)end); + pr_warn("Trying to free nonexistent resource <%pa-%pa>\n", &start, &end); } EXPORT_SYMBOL(__release_region); @@ -1658,6 +1656,7 @@ __setup("reserve=", reserve_setup); int iomem_map_sanity_check(resource_size_t addr, unsigned long size) { struct resource *p = &iomem_resource; + resource_size_t end = addr + size - 1; int err = 0; loff_t l; @@ -1667,12 +1666,12 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) * We can probably skip the resources without * IORESOURCE_IO attribute? */ - if (p->start >= addr + size) + if (p->start > end) continue; if (p->end < addr) continue; if (PFN_DOWN(p->start) <= PFN_DOWN(addr) && - PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1)) + PFN_DOWN(p->end) >= PFN_DOWN(end)) continue; /* * if a resource is "BUSY", it's not a hardware resource @@ -1683,10 +1682,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) if (p->flags & IORESOURCE_BUSY) continue; - printk(KERN_WARNING "resource sanity check: requesting [mem %#010llx-%#010llx], which spans more than %s %pR\n", - (unsigned long long)addr, - (unsigned long long)(addr + size - 1), - p->name, p); + pr_warn("resource sanity check: requesting [mem %pa-%pa], which spans more than %s %pR\n", + &addr, &end, p->name, p); err = -1; break; } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2c6611c13f99..e95c8fe478cd 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -375,6 +375,7 @@ config SCHED_TRACER config HWLAT_TRACER bool "Tracer to detect hardware latencies (like SMIs)" select GENERIC_TRACER + select TRACER_MAX_TRACE help This tracer, when enabled will create one or more kernel threads, depending on what the cpumask file is set to, which each thread @@ -410,6 +411,7 @@ config HWLAT_TRACER config OSNOISE_TRACER bool "OS Noise tracer" select GENERIC_TRACER + select TRACER_MAX_TRACE help In the context of high-performance computing (HPC), the Operating System Noise (osnoise) refers to the interference experienced by an diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e842f68b9a5..442438b93fe9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -163,7 +163,7 @@ static void ftrace_sync_ipi(void *data) static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic, RCU, or per CPU ops, or we force list func, + * If this is a dynamic or RCU ops, or we force list func, * then it needs to call the list anyway. */ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || @@ -2762,6 +2762,19 @@ void __weak ftrace_arch_code_modify_post_process(void) { } +static int update_ftrace_func(ftrace_func_t func) +{ + static ftrace_func_t save_func; + + /* Avoid updating if it hasn't changed */ + if (func == save_func) + return 0; + + save_func = func; + + return ftrace_update_ftrace_func(func); +} + void ftrace_modify_all_code(int command) { int update = command & FTRACE_UPDATE_TRACE_FUNC; @@ -2782,7 +2795,7 @@ void ftrace_modify_all_code(int command) * traced. */ if (update) { - err = ftrace_update_ftrace_func(ftrace_ops_list_func); + err = update_ftrace_func(ftrace_ops_list_func); if (FTRACE_WARN_ON(err)) return; } @@ -2798,7 +2811,7 @@ void ftrace_modify_all_code(int command) /* If irqs are disabled, we are in stop machine */ if (!irqs_disabled()) smp_call_function(ftrace_sync_ipi, NULL, 1); - err = ftrace_update_ftrace_func(ftrace_trace_function); + err = update_ftrace_func(ftrace_trace_function); if (FTRACE_WARN_ON(err)) return; } @@ -3070,8 +3083,6 @@ out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the per_cpu - * ops. */ if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* @@ -4192,6 +4203,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) } found = 1; } + cond_resched(); } while_for_each_ftrace_rec(); out_unlock: mutex_unlock(&ftrace_lock); @@ -7518,8 +7530,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true - * if PER_CPU is set, then ftrace_function_local_disable() - * must be false * Otherwise test if the ip matches the ops filter * * If any of the above fails then the op->func() is not executed. @@ -7569,8 +7579,8 @@ NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support - * recursion, needs RCU protection and/or requires per cpu handling, then - * this function will be called by the mcount trampoline. + * recursion, needs RCU protection, then this function will be called + * by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b21bf14bae9b..c366a0a9ddba 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2062,8 +2062,10 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *pages = &cpu_buffer->new_pages; int retries, success; + unsigned long flags; - raw_spin_lock_irq(&cpu_buffer->reader_lock); + /* Can be called at early boot up, where interrupts must not been enabled */ + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* * We are holding the reader lock, so the reader page won't be swapped * in the ring buffer. Now we are racing with the writer trying to @@ -2120,7 +2122,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) * tracing */ RB_WARN_ON(cpu_buffer, !success); - raw_spin_unlock_irq(&cpu_buffer->reader_lock); + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); /* free pages if they weren't inserted */ if (!success) { @@ -2248,8 +2250,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { - schedule_work_on(cpu, - &cpu_buffer->update_pages_work); + /* Run directly if possible. */ + migrate_disable(); + if (cpu != smp_processor_id()) { + migrate_enable(); + schedule_work_on(cpu, + &cpu_buffer->update_pages_work); + } else { + update_pages_handler(&cpu_buffer->update_pages_work); + migrate_enable(); + } } } @@ -2298,9 +2308,17 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { - schedule_work_on(cpu_id, - &cpu_buffer->update_pages_work); - wait_for_completion(&cpu_buffer->update_done); + /* Run directly if possible. */ + migrate_disable(); + if (cpu_id == smp_processor_id()) { + rb_update_pages(cpu_buffer); + migrate_enable(); + } else { + migrate_enable(); + schedule_work_on(cpu_id, + &cpu_buffer->update_pages_work); + wait_for_completion(&cpu_buffer->update_done); + } } cpu_buffer->nr_pages_to_update = 0; @@ -3180,8 +3198,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event) event->time_delta = 1; } -static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, - struct ring_buffer_event *event) +static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) { local_inc(&cpu_buffer->entries); rb_end_commit(cpu_buffer); @@ -3383,15 +3400,14 @@ void ring_buffer_nest_end(struct trace_buffer *buffer) * * Must be paired with ring_buffer_lock_reserve. */ -int ring_buffer_unlock_commit(struct trace_buffer *buffer, - struct ring_buffer_event *event) +int ring_buffer_unlock_commit(struct trace_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; int cpu = raw_smp_processor_id(); cpu_buffer = buffer->buffers[cpu]; - rb_commit(cpu_buffer, event); + rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); @@ -3977,7 +3993,7 @@ int ring_buffer_write(struct trace_buffer *buffer, memcpy(body, data, length); - rb_commit(cpu_buffer, event); + rb_commit(cpu_buffer); rb_wakeups(buffer, cpu_buffer); @@ -5998,7 +6014,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) } out: - ring_buffer_unlock_commit(data->buffer, event); + ring_buffer_unlock_commit(data->buffer); return 0; } diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 78e576575b79..aef34673d79d 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -258,7 +258,7 @@ static void ring_buffer_producer(void) hit++; entry = ring_buffer_event_data(event); *entry = smp_processor_id(); - ring_buffer_unlock_commit(buffer, event); + ring_buffer_unlock_commit(buffer); } } end_time = ktime_get(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5cfc95a52bc3..6d7ef130f57e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,7 +19,6 @@ #include <linux/kallsyms.h> #include <linux/security.h> #include <linux/seq_file.h> -#include <linux/notifier.h> #include <linux/irqflags.h> #include <linux/debugfs.h> #include <linux/tracefs.h> @@ -85,7 +84,7 @@ void __init disable_tracing_selftest(const char *reason) #endif /* Pipe tracepoints to printk */ -struct trace_iterator *tracepoint_print_iter; +static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); @@ -999,7 +998,7 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev /* ring_buffer_unlock_commit() enables preemption */ preempt_enable_notrace(); } else - ring_buffer_unlock_commit(buffer, event); + ring_buffer_unlock_commit(buffer); } /** @@ -1421,6 +1420,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) return false; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); +#define free_snapshot(tr) do { } while (0) #endif /* CONFIG_TRACER_SNAPSHOT */ void tracer_tracing_off(struct trace_array *tr) @@ -1692,6 +1692,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) } unsigned long __read_mostly tracing_thresh; + +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops; #ifdef LATENCY_FS_NOTIFY @@ -1748,18 +1750,14 @@ void latency_fsnotify(struct trace_array *tr) irq_work_queue(&tr->fsnotify_irqwork); } -#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ d_tracer, &tr->max_latency, &tracing_max_lat_fops) -#else -#define trace_create_maxlat_file(tr, d_tracer) do { } while (0) #endif -#ifdef CONFIG_TRACER_MAX_TRACE /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, @@ -1834,14 +1832,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, ring_buffer_record_off(tr->max_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT - if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) - goto out_unlock; + if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { + arch_spin_unlock(&tr->max_lock); + return; + } #endif swap(tr->array_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); - out_unlock: arch_spin_unlock(&tr->max_lock); } @@ -1888,6 +1887,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } + #endif /* CONFIG_TRACER_MAX_TRACE */ static int wait_on_pipe(struct trace_iterator *iter, int full) @@ -5678,6 +5678,7 @@ static const char readme_msg[] = "\t [:size=#entries]\n" "\t [:pause][:continue][:clear]\n" "\t [:name=histname1]\n" + "\t [:nohitcount]\n" "\t [:<handler>.<action>]\n" "\t [if <filter>]\n\n" "\t Note, special fields can be used as well:\n" @@ -5724,7 +5725,9 @@ static const char readme_msg[] = "\t .syscall display a syscall id as a syscall name\n" "\t .log2 display log2 value rather than raw number\n" "\t .buckets=size display values in groups of size rather than raw number\n" - "\t .usecs display a common_timestamp in microseconds\n\n" + "\t .usecs display a common_timestamp in microseconds\n" + "\t .percent display a number of percentage value\n" + "\t .graph display a bar-graph of a value\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" "\t until told to do so. 'continue' can be used to start or\n" @@ -5732,6 +5735,8 @@ static const char readme_msg[] = "\t The 'clear' parameter will clear the contents of a running\n" "\t hist trigger and leave its current paused/active state\n" "\t unchanged.\n\n" + "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" + "\t raw hitcount in the histogram.\n\n" "\t The enable_hist and disable_hist triggers can be used to\n" "\t have one event conditionally start and stop another event's\n" "\t already-attached hist trigger. The syntax is analogous to\n" @@ -6572,7 +6577,7 @@ out: return ret; } -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, @@ -6796,7 +6801,20 @@ waitagain: ret = print_trace_line(iter); if (ret == TRACE_TYPE_PARTIAL_LINE) { - /* don't print partial lines */ + /* + * If one print_trace_line() fills entire trace_seq in one shot, + * trace_seq_to_user() will returns -EBUSY because save_len == 0, + * In this case, we need to consume it, otherwise, loop will peek + * this event next time, resulting in an infinite loop. + */ + if (save_len == 0) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + trace_consume(iter); + break; + } + + /* In other cases, don't print partial lines */ iter->seq.seq.len = save_len; break; } @@ -7587,7 +7605,7 @@ static const struct file_operations tracing_thresh_fops = { .llseek = generic_file_llseek, }; -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -9601,7 +9619,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) create_trace_options_dir(tr); +#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); +#endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); @@ -9855,41 +9875,41 @@ static __init int tracer_init_tracefs(void) fs_initcall(tracer_init_tracefs); -static int trace_panic_handler(struct notifier_block *this, - unsigned long event, void *unused) -{ - if (ftrace_dump_on_oops) - ftrace_dump(ftrace_dump_on_oops); - return NOTIFY_OK; -} +static int trace_die_panic_handler(struct notifier_block *self, + unsigned long ev, void *unused); static struct notifier_block trace_panic_notifier = { - .notifier_call = trace_panic_handler, - .next = NULL, - .priority = 150 /* priority: INT_MAX >= x >= 0 */ + .notifier_call = trace_die_panic_handler, + .priority = INT_MAX - 1, }; -static int trace_die_handler(struct notifier_block *self, - unsigned long val, - void *data) -{ - switch (val) { - case DIE_OOPS: - if (ftrace_dump_on_oops) - ftrace_dump(ftrace_dump_on_oops); - break; - default: - break; - } - return NOTIFY_OK; -} - static struct notifier_block trace_die_notifier = { - .notifier_call = trace_die_handler, - .priority = 200 + .notifier_call = trace_die_panic_handler, + .priority = INT_MAX - 1, }; /* + * The idea is to execute the following die/panic callback early, in order + * to avoid showing irrelevant information in the trace (like other panic + * notifier functions); we are the 2nd to run, after hung_task/rcu_stall + * warnings get disabled (to prevent potential log flooding). + */ +static int trace_die_panic_handler(struct notifier_block *self, + unsigned long ev, void *unused) +{ + if (!ftrace_dump_on_oops) + return NOTIFY_DONE; + + /* The die notifier requires DIE_OOPS to trigger */ + if (self == &trace_die_notifier && ev != DIE_OOPS) + return NOTIFY_DONE; + + ftrace_dump(ftrace_dump_on_oops); + + return NOTIFY_DONE; +} + +/* * printk is set to max of 1024, we really don't need it that big. * Nothing should be printing 1000 characters anyway. */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d42e24507152..e46a49269be2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -308,8 +308,7 @@ struct trace_array { struct array_buffer max_buffer; bool allocated_snapshot; #endif -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; @@ -615,7 +614,7 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, - va_list ap); + va_list ap) __printf(2, 0); int trace_empty(struct trace_iterator *iter); @@ -688,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -#endif /* CONFIG_TRACER_MAX_TRACE */ -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ - || defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY) +#ifdef CONFIG_FSNOTIFY #define LATENCY_FS_NOTIFY #endif +#endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef LATENCY_FS_NOTIFY void latency_fsnotify(struct trace_array *tr); @@ -1942,8 +1940,6 @@ static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { } static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -extern struct trace_iterator *tracepoint_print_iter; - /* * Reset the state of the trace_iterator so that it can read consumed data. * Normally, the trace_iterator is used for reading the data when it is not @@ -1956,17 +1952,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) } /* Check the name is good for event/group/fields */ -static inline bool is_good_name(const char *name) +static inline bool __is_good_name(const char *name, bool hash_ok) { |
