summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-12-25 10:43:08 +0100
committerIngo Molnar <mingo@kernel.org>2019-12-25 10:43:08 +0100
commit46f5cfc13d54962097cb0cc5f97593e2a98a6aed (patch)
tree1ec5da58e3d085a7e358a4b7adf3b6e6b7ce1753 /arch/x86
parent46cf053efec6a3a5f343fead837777efe8252a46 (diff)
parent2040cf9f59037aa8aec749363e69ead165b67b43 (diff)
downloadlinux-46f5cfc13d54962097cb0cc5f97593e2a98a6aed.tar.gz
linux-46f5cfc13d54962097cb0cc5f97593e2a98a6aed.tar.bz2
linux-46f5cfc13d54962097cb0cc5f97593e2a98a6aed.zip
Merge branch 'core/kprobes' into perf/core, to pick up a completed branch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/include/asm/kprobes.h14
-rw-r--r--arch/x86/include/asm/set_memory.h2
-rw-r--r--arch/x86/include/asm/text-patching.h86
-rw-r--r--arch/x86/kernel/alternative.c130
-rw-r--r--arch/x86/kernel/ftrace.c688
-rw-r--r--arch/x86/kernel/jump_label.c116
-rw-r--r--arch/x86/kernel/kprobes/core.c20
-rw-r--r--arch/x86/kernel/kprobes/opt.c67
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/mm/init_32.c28
-rw-r--r--arch/x86/mm/init_64.c36
12 files changed, 385 insertions, 813 deletions
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c2a7458f912c..85be2f506272 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -47,8 +47,6 @@ struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
-int ftrace_int3_handler(struct pt_regs *regs);
-
#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 5dc909d9ad81..95b1f053bd96 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -11,12 +11,11 @@
#include <asm-generic/kprobes.h>
-#define BREAKPOINT_INSTRUCTION 0xcc
-
#ifdef CONFIG_KPROBES
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#include <asm/text-patching.h>
#include <asm/insn.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
@@ -25,10 +24,7 @@ struct pt_regs;
struct kprobe;
typedef u8 kprobe_opcode_t;
-#define RELATIVEJUMP_OPCODE 0xe9
-#define RELATIVEJUMP_SIZE 5
-#define RELATIVECALL_OPCODE 0xe8
-#define RELATIVE_ADDR_SIZE 4
+
#define MAX_STACK_SIZE 64
#define CUR_STACK_SIZE(ADDR) \
(current_top_of_stack() - (unsigned long)(ADDR))
@@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[];
extern __visible kprobe_opcode_t optprobe_template_val[];
extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
-#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE)
#define MAX_OPTINSN_SIZE \
(((unsigned long)optprobe_template_end - \
(unsigned long)optprobe_template_entry) + \
- MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+ MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE)
extern const int kretprobe_blacklist_size;
@@ -73,7 +69,7 @@ struct arch_specific_insn {
struct arch_optimized_insn {
/* copy of the original instructions */
- kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ kprobe_opcode_t copied_insn[DISP32_SIZE];
/* detour code buffer */
kprobe_opcode_t *insn;
/* the size of instructions copied to detour code buffer */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 2ee8e469dcf5..64c3dce374e5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
#ifdef CONFIG_X86_64
static inline int set_mce_nospec(unsigned long pfn)
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 23c626a742e8..67315fa3956a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -25,14 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
*/
#define POKE_MAX_OPCODE_SIZE 5
-struct text_poke_loc {
- void *addr;
- int len;
- s32 rel32;
- u8 opcode;
- const u8 text[POKE_MAX_OPCODE_SIZE];
-};
-
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@@ -50,21 +42,13 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
* an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
-extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
-extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
- const void *opcode, size_t len, const void *emulate);
-extern int after_bootmem;
-extern __ro_after_init struct mm_struct *poking_mm;
-extern __ro_after_init unsigned long poking_addr;
-#ifndef CONFIG_UML_X86
-static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
-{
- regs->ip = ip;
-}
+extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
+extern void text_poke_finish(void);
#define INT3_INSN_SIZE 1
#define INT3_INSN_OPCODE 0xCC
@@ -78,6 +62,67 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define JMP8_INSN_SIZE 2
#define JMP8_INSN_OPCODE 0xEB
+#define DISP32_SIZE 4
+
+static inline int text_opcode_size(u8 opcode)
+{
+ int size = 0;
+
+#define __CASE(insn) \
+ case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break
+
+ switch(opcode) {
+ __CASE(INT3);
+ __CASE(CALL);
+ __CASE(JMP32);
+ __CASE(JMP8);
+ }
+
+#undef __CASE
+
+ return size;
+}
+
+union text_poke_insn {
+ u8 text[POKE_MAX_OPCODE_SIZE];
+ struct {
+ u8 opcode;
+ s32 disp;
+ } __attribute__((packed));
+};
+
+static __always_inline
+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
+{
+ static union text_poke_insn insn; /* per instance */
+ int size = text_opcode_size(opcode);
+
+ insn.opcode = opcode;
+
+ if (size > 1) {
+ insn.disp = (long)dest - (long)(addr + size);
+ if (size == 2) {
+ /*
+ * Ensure that for JMP9 the displacement
+ * actually fits the signed byte.
+ */
+ BUG_ON((insn.disp >> 31) != (insn.disp >> 7));
+ }
+ }
+
+ return &insn.text;
+}
+
+extern int after_bootmem;
+extern __ro_after_init struct mm_struct *poking_mm;
+extern __ro_after_init unsigned long poking_addr;
+
+#ifndef CONFIG_UML_X86
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+ regs->ip = ip;
+}
+
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -85,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
* stack where the break point happened, and the saving of
* pt_regs. We can extend the original stack because of
* this gap. See the idtentry macro's create_gap option.
+ *
+ * Similarly entry_32.S will have a gap on the stack for (any) hardware
+ * exception and pt_regs; see FIXUP_FRAME.
*/
regs->sp -= sizeof(unsigned long);
*(unsigned long *)regs->sp = val;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9ec463fe96f2..30e86730655c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -936,27 +936,45 @@ static void do_sync_core(void *info)
sync_core();
}
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+struct text_poke_loc {
+ s32 rel_addr; /* addr := _stext + rel_addr */
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
+};
+
static struct bp_patching_desc {
struct text_poke_loc *vec;
int nr_entries;
} bp_patching;
-static int patch_cmp(const void *key, const void *elt)
+static inline void *text_poke_addr(struct text_poke_loc *tp)
+{
+ return _stext + tp->rel_addr;
+}
+
+static int notrace patch_cmp(const void *key, const void *elt)
{
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
- if (key < tp->addr)
+ if (key < text_poke_addr(tp))
return -1;
- if (key > tp->addr)
+ if (key > text_poke_addr(tp))
return 1;
return 0;
}
NOKPROBE_SYMBOL(patch_cmp);
-int poke_int3_handler(struct pt_regs *regs)
+int notrace poke_int3_handler(struct pt_regs *regs)
{
struct text_poke_loc *tp;
void *ip;
+ int len;
/*
* Having observed our INT3 instruction, we now must observe
@@ -992,11 +1010,12 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
} else {
tp = bp_patching.vec;
- if (tp->addr != ip)
+ if (text_poke_addr(tp) != ip)
return 0;
}
- ip += tp->len;
+ len = text_opcode_size(tp->opcode);
+ ip += len;
switch (tp->opcode) {
case INT3_INSN_OPCODE:
@@ -1023,6 +1042,10 @@ int poke_int3_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(poke_int3_handler);
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
/**
* text_poke_bp_batch() -- update instructions on live kernel on SMP
* @tp: vector of instructions to patch
@@ -1044,7 +1067,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* replacing opcode
* - sync cores
*/
-void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
+static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
@@ -1065,18 +1088,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* First step: add a int3 trap to the address that will be patched.
*/
for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, &int3, sizeof(int3));
+ text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
/*
* Second step: update all but the first byte of the patched range.
*/
for (do_sync = 0, i = 0; i < nr_entries; i++) {
- if (tp[i].len - sizeof(int3) > 0) {
- text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].text + sizeof(int3),
- tp[i].len - sizeof(int3));
+ int len = text_opcode_size(tp[i].opcode);
+
+ if (len - INT3_INSN_SIZE > 0) {
+ text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+ (const char *)tp[i].text + INT3_INSN_SIZE,
+ len - INT3_INSN_SIZE);
do_sync++;
}
}
@@ -1087,7 +1112,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* not necessary and we'd be safe even without it. But
* better safe than sorry (plus there's not only Intel).
*/
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
}
/*
@@ -1098,19 +1123,25 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
if (tp[i].text[0] == INT3_INSN_OPCODE)
continue;
- text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
do_sync++;
}
if (do_sync)
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
*/
- bp_patching.vec = NULL;
bp_patching.nr_entries = 0;
+ /*
+ * This sync_core () call ensures that all INT3 handlers in progress
+ * have finished. This allows poke_int3_handler() after this to
+ * avoid touching bp_paching.vec by checking nr_entries == 0.
+ */
+ text_poke_sync();
+ bp_patching.vec = NULL;
}
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
@@ -1118,11 +1149,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
{
struct insn insn;
- if (!opcode)
- opcode = (void *)tp->text;
- else
- memcpy((void *)tp->text, opcode, len);
-
+ memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
@@ -1132,8 +1159,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
BUG_ON(!insn_complete(&insn));
BUG_ON(len != insn.length);
- tp->addr = addr;
- tp->len = len;
+ tp->rel_addr = addr - (void *)_stext;
tp->opcode = insn.opcode.bytes[0];
switch (tp->opcode) {
@@ -1167,6 +1193,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
}
}
+/*
+ * We hard rely on the tp_vec being ordered; ensure this is so by flushing
+ * early if needed.
+ */
+static bool tp_order_fail(void *addr)
+{
+ struct text_poke_loc *tp;
+
+ if (!tp_vec_nr)
+ return false;
+
+ if (!addr) /* force */
+ return true;
+
+ tp = &tp_vec[tp_vec_nr - 1];
+ if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
+ return true;
+
+ return false;
+}
+
+static void text_poke_flush(void *addr)
+{
+ if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
+ text_poke_bp_batch(tp_vec, tp_vec_nr);
+ tp_vec_nr = 0;
+ }
+}
+
+void text_poke_finish(void)
+{
+ text_poke_flush(NULL);
+}
+
+void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
+{
+ struct text_poke_loc *tp;
+
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
+ text_poke_flush(addr);
+
+ tp = &tp_vec[tp_vec_nr++];
+ text_poke_loc_init(tp, addr, opcode, len, emulate);
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1178,10 +1253,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
+void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
struct text_poke_loc tp;
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 024c3053dbba..108ee96f8b66 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -34,6 +34,8 @@
#ifdef CONFIG_DYNAMIC_FTRACE
+static int ftrace_poke_late = 0;
+
int ftrace_arch_code_modify_prepare(void)
__acquires(&text_mutex)
{
@@ -43,84 +45,37 @@ int ftrace_arch_code_modify_prepare(void)
* ftrace has it set to "read/write".
*/
mutex_lock(&text_mutex);
- set_kernel_text_rw();
- set_all_modules_text_rw();
+ ftrace_poke_late = 1;
return 0;
}
int ftrace_arch_code_modify_post_process(void)
__releases(&text_mutex)
{
- set_all_modules_text_ro();
- set_kernel_text_ro();
+ /*
+ * ftrace_make_{call,nop}() may be called during
+ * module load, and we need to finish the text_poke_queue()
+ * that they do, here.
+ */
+ text_poke_finish();
+ ftrace_poke_late = 0;
mutex_unlock(&text_mutex);
return 0;
}
-union ftrace_code_union {
- char code[MCOUNT_INSN_SIZE];
- struct {
- unsigned char op;
- int offset;
- } __attribute__((packed));
-};
-
-static int ftrace_calc_offset(long ip, long addr)
-{
- return (int)(addr - ip);
-}
-
-static unsigned char *
-ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
+static const char *ftrace_nop_replace(void)
{
- static union ftrace_code_union calc;
-
- calc.op = op;
- calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
-
- return calc.code;
-}
-
-static unsigned char *
-ftrace_call_replace(unsigned long ip, unsigned long addr)
-{
- return ftrace_text_replace(0xe8, ip, addr);
-}
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
-{
- return addr >= start && addr < end;
+ return ideal_nops[NOP_ATOMIC5];
}
-static unsigned long text_ip_addr(unsigned long ip)
+static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
- /*
- * On x86_64, kernel text mappings are mapped read-only, so we use
- * the kernel identity mapping instead of the kernel text mapping
- * to modify the kernel text.
- *
- * For 32bit kernels, these mappings are same and we can use
- * kernel identity mapping to modify code.
- */
- if (within(ip, (unsigned long)_text, (unsigned long)_etext))
- ip = (unsigned long)__va(__pa_symbol(ip));
-
- return ip;
+ return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}
-static const unsigned char *ftrace_nop_replace(void)
+static int ftrace_verify_code(unsigned long ip, const char *old_code)
{
- return ideal_nops[NOP_ATOMIC5];
-}
-
-static int
-ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
- unsigned const char *new_code)
-{
- unsigned char replaced[MCOUNT_INSN_SIZE];
-
- ftrace_expected = old_code;
+ char cur_code[MCOUNT_INSN_SIZE];
/*
* Note:
@@ -129,31 +84,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
* Carefully read and modify the code with probe_kernel_*(), and make
* sure what we read is what we expected it to be before modifying it.
*/
-
/* read the text we want to modify */
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
+ WARN_ON(1);
return -EFAULT;
+ }
/* Make sure it is what we expect it to be */
- if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
+ WARN_ON(1);
return -EINVAL;
+ }
- ip = text_ip_addr(ip);
-
- /* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
- return -EPERM;
+ return 0;
+}
- sync_core();
+/*
+ * Marked __ref because it calls text_poke_early() which is .init.text. That is
+ * ok because that call will happen early, during boot, when .init sections are
+ * still present.
+ */
+static int __ref
+ftrace_modify_code_direct(unsigned long ip, const char *old_code,
+ const char *new_code)
+{
+ int ret = ftrace_verify_code(ip, old_code);
+ if (ret)
+ return ret;
+ /* replace the text with the new text */
+ if (ftrace_poke_late)
+ text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
+ else
+ text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
}
-int ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned const char *new, *old;
unsigned long ip = rec->ip;
+ const char *new, *old;
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
@@ -167,19 +137,20 @@ int ftrace_make_nop(struct module *mod,
* just modify the code directly.
*/
if (addr == MCOUNT_ADDR)
- return ftrace_modify_code_direct(rec->ip, old, new);
+ return ftrace_modify_code_direct(ip, old, new);
- ftrace_expected = NULL;
-
- /* Normal cases use add_brk_on_nop */
+ /*
+ * x86 overrides ftrace_replace_code -- this function will never be used
+ * in this case.
+ */
WARN_ONCE(1, "invalid use of ftrace_make_nop");
return -EINVAL;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned const char *new, *old;
unsigned long ip = rec->ip;
+ const char *new, *old;
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
@@ -189,43 +160,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
}
/*
- * The modifying_ftrace_code is used to tell the breakpoint
- * handler to call ftrace_int3_handler(). If it fails to
- * call this handler for a breakpoint added by ftrace, then
- * the kernel may crash.
- *
- * As atomic_writes on x86 do not need a barrier, we do not
- * need to add smp_mb()s for this to work. It is also considered
- * that we can not read the modifying_ftrace_code before
- * executing the breakpoint. That would be quite remarkable if
- * it could do that. Here's the flow that is required:
- *
- * CPU-0 CPU-1
- *
- * atomic_inc(mfc);
- * write int3s
- * <trap-int3> // implicit (r)mb
- * if (atomic_read(mfc))
- * call ftrace_int3_handler()
- *
- * Then when we are finished:
- *
- * atomic_dec(mfc);
- *
- * If we hit a breakpoint that was not set by ftrace, it does not
- * matter if ftrace_int3_handler() is called or not. It will
- * simply be ignored. But it is crucial that a ftrace nop/caller
- * breakpoint is handled. No other user should ever place a
- * breakpoint on an ftrace nop/caller location. It must only
- * be done by this code.
- */
-atomic_t modifying_ftrace_code __read_mostly;
-
-static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
- unsigned const char *new_code);
-
-/*
* Should never be called:
* As it is only called by __ftrace_replace_code() which is called by
* ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
@@ -237,452 +171,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
WARN_ON(1);
- ftrace_expected = NULL;
return -EINVAL;
}
-static unsigned long ftrace_update_func;
-static unsigned long ftrace_update_func_call;
-
-static int update_ftrace_func(unsigned long ip, void *new)
-{
- unsigned char old[MCOUNT_INSN_SIZE];
- int ret;
-
- memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
-
- ftrace_update_func = ip;
- /* Make sure the breakpoints see the ftrace_update_func update */
- smp_wmb();
-
- /* See comment above by declaration of modifying_ftrace_code */
- atomic_inc(&modifying_ftrace_code);
-
- ret = ftrace_modify_code(ip, old, new);
-
- atomic_dec(&modifying_ftrace_code);
-
- return ret;
-}
-
int ftrace_update_ftrace_func(ftrace_func_t func)
{
- unsigned long ip = (unsigned long)(&ftrace_call);
- unsigned char *new;
- int ret;
-
- ftrace_update_func_call = (unsigned long)func;
-
- new = ftrace_call_replace(ip, (unsigned long)func);
- ret = update_ftrace_func(ip, new);
-
- /* Also update the regs callback function */
- if (!ret) {
- ip = (unsigned long)(&ftrace_regs_call);
- new = ftrace_call_replace(ip, (unsigned long)func);
- ret = update_ftrace_func(ip, new);
- }
-
- return ret;
-}
-
-static nokprobe_inline int is_ftrace_caller(unsigned long ip)
-{
- if (ip == ftrace_update_func)
- return 1;
-
- return 0;
-}
-
-/*
- * A breakpoint was added to the code address we are about to
- * modify, and this is the handle that will just skip over it.
- * We are either changing a nop into a trace call, or a trace
- * call to a nop. While the change is taking place, we treat
- * it just like it was a nop.
- */
-int ftrace_int3_handler(struct pt_regs *regs)
-{
unsigned long ip;
+ const char *new;
- if (WARN_ON_ONCE(!regs))
- return 0;
-
- ip = regs->ip - INT3_INSN_SIZE;
-
- if (ftrace_location(ip)) {
- int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
- return 1;
- } else if (is_ftrace_caller(ip)) {
- if (!ftrace_update_func_call) {
- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
- return 1;
- }
- int3_emulate_call(regs, ftrace_update_func_call);
- return 1;
- }
-
- return 0;
-}
-NOKPROBE_SYMBOL(ftrace_int3_handler);
-
-static int ftrace_write(unsigned long ip, const char *val, int size)
-{
- ip = text_ip_addr(ip);
-
- if (probe_kernel_write((void *)ip, val, size))
- return -EPERM;
-
- return 0;
-}
-
-static int add_break(unsigned long ip, const char *old)
-{
- unsigned char replaced[MCOUNT_INSN_SIZE];
- unsigned char brk = BREAKPOINT_INSTRUCTION;
-
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- ftrace_expected = old;
-
- /* Make sure it is what we expect it to be */
- if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
- return -EINVAL;
-
- return ftrace_write(ip, &brk, 1);
-}
-
-static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned const char *old;
- unsigned long ip = rec->ip;
-
- old = ftrace_call_replace(ip, addr);
-
- return add_break(rec->ip, old);
-}
-
-
-static int add_brk_on_nop(struct dyn_ftrace *rec)
-{
- unsigned const char *old;
-
- old = ftrace_nop_replace();
-
- return add_break(rec->ip, old);
-}
-
-static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
-{
- unsigned long ftrace_addr;
- int ret;
-
- ftrace_addr = ftrace_get_addr_curr(rec);
-
- ret = ftrace_test_record(rec, enable);
-
- switch (ret) {
- case FTRACE_UPDATE_IGNORE:
- return 0;
-
- case FTRACE_UPDATE_MAKE_CALL:
- /* converting nop to call */
- return add_brk_on_nop(rec);
-
- case FTRACE_UPDATE_MODIFY_CALL:
- case FTRACE_UPDATE_MAKE_NOP:
- /* converting a call to a nop */
- return add_brk_on_call(rec, ftrace_addr);
- }
- return 0;
-}
-
-/*
- * On error, we need to remove breakpoints. This needs to
- * be done caefully. If the address does not currently have a
- * breakpoint, we know we are done. Otherwise, we look at the
- * remaining 4 bytes of the instruction. If it matches a nop
- * we replace the breakpoint with the nop. Otherwise we replace
- * it with the call instruction.
- */
-static int remove_breakpoint(struct dyn_ftrace *rec)
-{
- unsigned char ins[MCOUNT_INSN_SIZE];
- unsigned char brk = BREAKPOINT_INSTRUCTION;
- const unsigned char *nop;
- unsigned long ftrace_addr;
- unsigned long ip = rec->ip;
-
- /* If we fail the read, just give up */
- if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* If this does not have a breakpoint, we are done */
- if (ins[0] != brk)
- return 0;
-
- nop = ftrace_nop_replace();
-
- /*
- * If the last 4 bytes of the instruction do not match
- * a nop, then we assume that this is a call to ftrace_addr.
- */
- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
- /*
- * For extra paranoidism, we check if the breakpoint is on
- * a call that would actually jump to the ftrace_addr.
- * If not, don't touch the breakpoint, we make just create
- * a disaster.
- */
- ftrace_addr = ftrace_get_addr_new(rec);
- nop = ftrace_call_replace(ip, ftrace_addr);
-
- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
- goto update;
-
- /* Check both ftrace_addr and ftrace_old_addr */
- ftrace_addr = ftrace_get_addr_curr(rec);
- nop = ftrace_call_replace(ip, ftrace_addr);
-
- ftrace_expected = nop;
-
- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
- return -EINVAL;
- }
-
- update:
- return ftrace_write(ip, nop, 1);
-}
-
-static int add_update_code(unsigned long ip, unsigned const char *new)
-{
- /* skip breakpoint */
- ip++;
- new++;
- return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
-}
-
-static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned const char *new;
-
- new = ftrace_call_replace(ip, addr);
- return add_update_code(ip, new);
-}
-
-static int add_update_nop(struct dyn_ftrace *rec)
-{
- unsigned long ip = rec->ip;
- unsigned const char *new;
-
- new = ftrace_nop_replace();
- return add_update_code(ip, new);
-}
-
-static int add_update(struct dyn_ftrace *rec, bool enable)
-{
- unsigned long ftrace_addr;
- int ret;
-
- ret = ftrace_test_record(rec, enable);
-
- ftrace_addr = ftrace_get_addr_new(rec);
-
- switch (ret) {
- case FTRACE_UPDATE_IGNORE:
- return 0;
-
- case FTRACE_UPDATE_MODIFY_CALL:
- case FTRACE_UPDATE_MAKE_CALL:
- /* converting nop to call */
- return add_update_call(rec, ftrace_addr);
-
- case FTRACE_UPDATE_MAKE_NOP:
- /* converting a call to a nop */
- return add_update_nop(rec);
- }
-
- return 0;
-}
-
-static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned const char *new;
-
- new = ftrace_call_replace(ip, addr);
-
- return ftrace_write(ip, new, 1);
-}
-
-static int finish_update_nop(struct dyn_ftrace *rec)
-{
- unsigned long ip = rec->ip;
- unsigned const char *new;
-
- new = ftrace_nop_replace();
-
- return ftrace_write(ip, new, 1);
-}
-
-static int finish_update(struct dyn_ftrace *rec, bool enable)
-{
- unsigned long ftrace_addr;
- int ret;
-
- ret = ftrace_update_record(rec, enable);
-
- ftrace_addr = ftrace_get_addr_new(rec);
-
- switch (ret) {
- case FTRACE_UPDATE_IGNORE:
- return 0;
-
- case FTRACE_UPDATE_MODIFY_CALL:
- case FTRACE_UPDATE_MAKE_CALL:
- /* converting nop to call */
- return finish_update_call(rec, ftrace_addr);
+ ip = (unsigned long)(&ftrace_call);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
- case FTRACE_UPDATE_MAKE_NOP:
- /* converting a call to a nop */
- return finish_update_nop(rec);
- }
+ ip = (unsigned long)(&ftrace_regs_call);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
return 0;
}
-static void do_sync_core(void *data)
-{
- sync_core();
-}
-
-static void run_sync(void)
-{
- int enable_irqs;
-
- /* No need to sync if there's only one CPU */
- if (num_online_cpus() == 1)
- return;
-
- enable_irqs = irqs_disabled();
-
- /* We may be called with interrupts disabled (on bootup). */
- if (enable_irqs)
- local_irq_enable();
- on_each_cpu(do_sync_core, NULL, 1);
- if (enable_irqs)
- local_irq_disable();
-}
-
void ftrace_replace_code(int enable)
{
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
- const char *report = "adding breakpoints";
- int count = 0;