diff options
| author | Ingo Molnar <mingo@kernel.org> | 2020-03-21 09:23:40 +0100 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2020-03-21 09:24:41 +0100 |
| commit | a4654e9bde4ecedb4921e6c8fe2088114bdff1b3 (patch) | |
| tree | 1b9970b520d7bc7176cc9460fe67f210be5ea181 /arch/x86/kernel | |
| parent | 7add7875a8eb4ffe5eddaf8a11e409c9e1b6e3f3 (diff) | |
| parent | e4160b2e4b02377c67f8ecd05786811598f39acd (diff) | |
| download | linux-a4654e9bde4ecedb4921e6c8fe2088114bdff1b3.tar.gz linux-a4654e9bde4ecedb4921e6c8fe2088114bdff1b3.tar.bz2 linux-a4654e9bde4ecedb4921e6c8fe2088114bdff1b3.zip | |
Merge branch 'x86/kdump' into locking/kcsan, to resolve conflicts
Conflicts:
arch/x86/purgatory/Makefile
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
70 files changed, 1320 insertions, 1441 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ccb13271895d..70615591a265 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o +obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca13851f0570..26b7256f590f 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -27,6 +27,17 @@ static char temp_stack[4096]; #endif /** + * acpi_get_wakeup_address - provide physical address for S3 wakeup + * + * Returns the physical address where the kernel should be resumed after the + * system awakes from S3, e.g. for programming into the firmware waking vector. + */ +unsigned long acpi_get_wakeup_address(void) +{ + return ((unsigned long)(real_mode_header->wakeup_start)); +} + +/** * x86_acpi_enter_sleep_state - enter sleep state * @state: Sleep state to enter. * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index fbb60ca4255c..d06c2079b6c1 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,7 +3,7 @@ * Variables and functions used by the code in sleep.c */ -#include <asm/realmode.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ec463fe96f2..15ac0d5f4b40 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,6 +23,7 @@ #include <asm/nmi.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/insn.h> #include <asm/io.h> #include <asm/fixmap.h> @@ -936,44 +937,81 @@ static void do_sync_core(void *info) sync_core(); } -static struct bp_patching_desc { +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +struct text_poke_loc { + s32 rel_addr; /* addr := _stext + rel_addr */ + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; +}; + +struct bp_patching_desc { struct text_poke_loc *vec; int nr_entries; -} bp_patching; + atomic_t refs; +}; + +static struct bp_patching_desc *bp_desc; + +static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +{ + struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */ + + if (!desc || !atomic_inc_not_zero(&desc->refs)) + return NULL; + + return desc; +} + +static inline void put_desc(struct bp_patching_desc *desc) +{ + smp_mb__before_atomic(); + atomic_dec(&desc->refs); +} -static int patch_cmp(const void *key, const void *elt) +static inline void *text_poke_addr(struct text_poke_loc *tp) +{ + return _stext + tp->rel_addr; +} + +static int notrace patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; - if (key < tp->addr) + if (key < text_poke_addr(tp)) return -1; - if (key > tp->addr) + if (key > text_poke_addr(tp)) return 1; return 0; } NOKPROBE_SYMBOL(patch_cmp); -int poke_int3_handler(struct pt_regs *regs) +int notrace poke_int3_handler(struct pt_regs *regs) { + struct bp_patching_desc *desc; struct text_poke_loc *tp; + int len, ret = 0; void *ip; + if (user_mode(regs)) + return 0; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching.nr_entries. + * bp_desc: * - * nr_entries != 0 INT3 + * bp_desc = desc INT3 * WMB RMB - * write INT3 if (nr_entries) - * - * Idem for other elements in bp_patching. + * write INT3 if (desc) */ smp_rmb(); - if (likely(!bp_patching.nr_entries)) - return 0; - - if (user_mode(regs)) + desc = try_get_desc(&bp_desc); + if (!desc) return 0; /* @@ -984,19 +1022,20 @@ int poke_int3_handler(struct pt_regs *regs) /* * Skip the binary search if there is a single member in the vector. */ - if (unlikely(bp_patching.nr_entries > 1)) { - tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, + if (unlikely(desc->nr_entries > 1)) { + tp = bsearch(ip, desc->vec, desc->nr_entries, sizeof(struct text_poke_loc), patch_cmp); if (!tp) - return 0; + goto out_put; } else { - tp = bp_patching.vec; - if (tp->addr != ip) - return 0; + tp = desc->vec; + if (text_poke_addr(tp) != ip) + goto out_put; } - ip += tp->len; + len = text_opcode_size(tp->opcode); + ip += len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1004,7 +1043,7 @@ int poke_int3_handler(struct pt_regs *regs) * Someone poked an explicit INT3, they'll want to handle it, * do not consume. */ - return 0; + goto out_put; case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); @@ -1019,10 +1058,18 @@ int poke_int3_handler(struct pt_regs *regs) BUG(); } - return 1; + ret = 1; + +out_put: + put_desc(desc); + return ret; } NOKPROBE_SYMBOL(poke_int3_handler); +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + /** * text_poke_bp_batch() -- update instructions on live kernel on SMP * @tp: vector of instructions to patch @@ -1044,16 +1091,20 @@ NOKPROBE_SYMBOL(poke_int3_handler); * replacing opcode * - sync cores */ -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { + struct bp_patching_desc desc = { + .vec = tp, + .nr_entries = nr_entries, + .refs = ATOMIC_INIT(1), + }; unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; int do_sync; lockdep_assert_held(&text_mutex); - bp_patching.vec = tp; - bp_patching.nr_entries = nr_entries; + smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ /* * Corresponding read barrier in int3 notifier for making sure the @@ -1065,18 +1116,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * First step: add a int3 trap to the address that will be patched. */ for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, &int3, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - if (tp[i].len - sizeof(int3) > 0) { - text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].text + sizeof(int3), - tp[i].len - sizeof(int3)); + int len = text_opcode_size(tp[i].opcode); + + if (len - INT3_INSN_SIZE > 0) { + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, + (const char *)tp[i].text + INT3_INSN_SIZE, + len - INT3_INSN_SIZE); do_sync++; } } @@ -1087,7 +1140,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * not necessary and we'd be safe even without it. But * better safe than sorry (plus there's not only Intel). */ - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); } /* @@ -1098,19 +1151,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) if (tp[i].text[0] == INT3_INSN_OPCODE) continue; - text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); do_sync++; } if (do_sync) - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* - * sync_core() implies an smp_mb() and orders this store against - * the writing of the new instruction. + * Remove and synchronize_rcu(), except we have a very primitive + * refcount based completion. */ - bp_patching.vec = NULL; - bp_patching.nr_entries = 0; + WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ + if (!atomic_dec_and_test(&desc.refs)) + atomic_cond_read_acquire(&desc.refs, !VAL); } void text_poke_loc_init(struct text_poke_loc *tp, void *addr, @@ -1118,11 +1172,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, { struct insn insn; - if (!opcode) - opcode = (void *)tp->text; - else - memcpy((void *)tp->text, opcode, len); - + memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; @@ -1132,8 +1182,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, BUG_ON(!insn_complete(&insn)); BUG_ON(len != insn.length); - tp->addr = addr; - tp->len = len; + tp->rel_addr = addr - (void *)_stext; tp->opcode = insn.opcode.bytes[0]; switch (tp->opcode) { @@ -1167,6 +1216,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, } } +/* + * We hard rely on the tp_vec being ordered; ensure this is so by flushing + * early if needed. + */ +static bool tp_order_fail(void *addr) +{ + struct text_poke_loc *tp; + + if (!tp_vec_nr) + return false; + + if (!addr) /* force */ + return true; + + tp = &tp_vec[tp_vec_nr - 1]; + if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) + return true; + + return false; +} + +static void text_poke_flush(void *addr) +{ + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { + text_poke_bp_batch(tp_vec, tp_vec_nr); + tp_vec_nr = 0; + } +} + +void text_poke_finish(void) +{ + text_poke_flush(NULL); +} + +void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) +{ + struct text_poke_loc *tp; + + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + + text_poke_flush(addr); + + tp = &tp_vec[tp_vec_nr++]; + text_poke_loc_init(tp, addr, opcode, len, emulate); +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1178,10 +1276,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) +void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { struct text_poke_loc tp; + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 251c795b4eb3..69aed0ebbdfc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -22,6 +22,7 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 5da106f84e84..fe698f96617c 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -95,7 +95,7 @@ static inline void apbt_set_mapping(void) printk(KERN_WARNING "No timer base from SFI, use default\n"); apbt_address = APBT_DEFAULT_BASE; } - apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); + apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE); if (!apbt_virt_address) { pr_debug("Failed mapping APBT phy address at %lu\n",\ (unsigned long)apbt_address); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 28446fa6bf18..5f973fed3c9f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); @@ -2626,6 +2639,13 @@ static int lapic_suspend(void) #endif local_irq_save(flags); + + /* + * Mask IOAPIC before disabling the local APIC to prevent stale IRR + * entries on some implementations. + */ + mask_ioapic_entries(); + disable_local_APIC(); irq_remapping_disable(); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474..159bd0cb8548 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); |
