Loading arch/x86/Kconfig +0 −1 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ config X86 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KVM select HAVE_ARCH_KGDB Loading arch/x86/include/asm/ftrace.h +3 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); extern int modifying_ftrace_code; static inline unsigned long ftrace_call_adjust(unsigned long addr) { Loading @@ -50,6 +51,8 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; int ftrace_int3_handler(struct pt_regs *regs); #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ Loading arch/x86/kernel/ftrace.c +343 −168 Original line number Diff line number Diff line Loading @@ -24,40 +24,21 @@ #include <trace/syscall.h> #include <asm/cacheflush.h> #include <asm/kprobes.h> #include <asm/ftrace.h> #include <asm/nops.h> #include <asm/nmi.h> #ifdef CONFIG_DYNAMIC_FTRACE /* * modifying_code is set to notify NMIs that they need to use * memory barriers when entering or exiting. But we don't want * to burden NMIs with unnecessary memory barriers when code * modification is not being done (which is most of the time). * * A mutex is already held when ftrace_arch_code_modify_prepare * and post_process are called. No locks need to be taken here. * * Stop machine will make sure currently running NMIs are done * and new NMIs will see the updated variable before we need * to worry about NMIs doing memory barriers. */ static int modifying_code __read_mostly; static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); set_all_modules_text_rw(); modifying_code = 1; return 0; } int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; set_all_modules_text_ro(); set_kernel_text_ro(); return 0; Loading Loading @@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } /* * Modifying code must take extra care. On an SMP machine, if * the code being modified is also being executed on another CPU * that CPU will have undefined results and possibly take a GPF. * We use kstop_machine to stop other CPUS from exectuing code. * But this does not stop NMIs from happening. We still need * to protect against that. We separate out the modification of * the code to take care of this. * * Two buffers are added: An IP buffer and a "code" buffer. * * 1) Put the instruction pointer into the IP buffer * and the new code into the "code" buffer. * 2) Wait for any running NMIs to finish and set a flag that says * we are modifying code, it is done in an atomic operation. * 3) Write the code * 4) clear the flag. * 5) Wait for any running NMIs to finish. * * If an NMI is executed, the first thing it does is to call * "ftrace_nmi_enter". This will check if the flag is set to write * and if it is, it will write what is in the IP and "code" buffers. * * The trick is, it does not matter if everyone is writing the same * content to the code location. Also, if a CPU is executing code * it is OK to write to that code location if the contents being written * are the same as what exists. */ #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; static atomic_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { int r; r = snprintf(buf, size, "%u %u", nmi_wait_count, atomic_read(&nmi_update_count)); return r; } static void clear_mod_flag(void) { int old = atomic_read(&nmi_running); for (;;) { int new = old & ~MOD_CODE_WRITE_FLAG; if (old == new) break; old = atomic_cmpxchg(&nmi_running, old, new); } } static void ftrace_mod_code(void) { /* * Yes, more than one CPU process can be writing to mod_code_status. * (and the code itself) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ if (mod_code_status) clear_mod_flag(); } void ftrace_nmi_enter(void) { __this_cpu_write(save_modifying_code, modifying_code); if (!__this_cpu_read(save_modifying_code)) return; if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); ftrace_mod_code(); atomic_inc(&nmi_update_count); } /* Must have previous changes seen before executions */ smp_mb(); } void ftrace_nmi_exit(void) { if (!__this_cpu_read(save_modifying_code)) return; /* Finish all executions before clearing nmi_running */ smp_mb(); atomic_dec(&nmi_running); } static void wait_for_nmi_and_set_mod_flag(void) { if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) return; do { cpu_relax(); } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); nmi_wait_count++; } static void wait_for_nmi(void) { if (!atomic_read(&nmi_running)) return; do { cpu_relax(); } while (atomic_read(&nmi_running)); nmi_wait_count++; } static inline int within(unsigned long addr, unsigned long start, unsigned long end) { Loading @@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) if (within(ip, (unsigned long)_text, (unsigned long)_etext)) ip = (unsigned long)__va(__pa(ip)); mod_code_ip = (void *)ip; mod_code_newcode = new_code; /* The buffers need to be visible before we let NMIs write them */ smp_mb(); wait_for_nmi_and_set_mod_flag(); /* Make sure all running NMIs have finished before we write the code */ smp_mb(); ftrace_mod_code(); /* Make sure the write happens before clearing the bit */ smp_mb(); clear_mod_flag(); wait_for_nmi(); return mod_code_status; return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); } static const unsigned char *ftrace_nop_replace(void) Loading Loading @@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } int modifying_ftrace_code __read_mostly; /* * A breakpoint was added to the code address we are about to * modify, and this is the handle that will just skip over it. * We are either changing a nop into a trace call, or a trace * call to a nop. While the change is taking place, we treat * it just like it was a nop. */ int ftrace_int3_handler(struct pt_regs *regs) { if (WARN_ON_ONCE(!regs)) return 0; if (!ftrace_location(regs->ip - 1)) return 0; regs->ip += MCOUNT_INSN_SIZE - 1; return 1; } static int ftrace_write(unsigned long ip, const char *val, int size) { /* * On x86_64, kernel text mappings are mapped read-only with * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead * of the kernel text mapping to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. */ if (within(ip, (unsigned long)_text, (unsigned long)_etext)) ip = (unsigned long)__va(__pa(ip)); return probe_kernel_write((void *)ip, val, size); } static int add_break(unsigned long ip, const char *old) { unsigned char replaced[MCOUNT_INSN_SIZE]; unsigned char brk = BREAKPOINT_INSTRUCTION; if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; if (ftrace_write(ip, &brk, 1)) return -EPERM; return 0; } static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned const char *old; unsigned long ip = rec->ip; old = ftrace_call_replace(ip, addr); return add_break(rec->ip, old); } static int add_brk_on_nop(struct dyn_ftrace *rec) { unsigned const char *old; old = ftrace_nop_replace(); return add_break(rec->ip, old); } static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_test_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_brk_on_nop(rec); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); } return 0; } /* * On error, we need to remove breakpoints. This needs to * be done caefully. If the address does not currently have a * breakpoint, we know we are done. Otherwise, we look at the * remaining 4 bytes of the instruction. If it matches a nop * we replace the breakpoint with the nop. Otherwise we replace * it with the call instruction. */ static int remove_breakpoint(struct dyn_ftrace *rec) { unsigned char ins[MCOUNT_INSN_SIZE]; unsigned char brk = BREAKPOINT_INSTRUCTION; const unsigned char *nop; unsigned long ftrace_addr; unsigned long ip = rec->ip; /* If we fail the read, just give up */ if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* If this does not have a breakpoint, we are done */ if (ins[0] != brk) return -1; nop = ftrace_nop_replace(); /* * If the last 4 bytes of the instruction do not match * a nop, then we assume that this is a call to ftrace_addr. */ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { /* * For extra paranoidism, we check if the breakpoint is on * a call that would actually jump to the ftrace_addr. * If not, don't touch the breakpoint, we make just create * a disaster. */ ftrace_addr = (unsigned long)FTRACE_ADDR; nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } return probe_kernel_write((void *)ip, &nop[0], 1); } static int add_update_code(unsigned long ip, unsigned const char *new) { /* skip breakpoint */ ip++; new++; if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) return -EPERM; return 0; } static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_call_replace(ip, addr); return add_update_code(ip, new); } static int add_update_nop(struct dyn_ftrace *rec) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_nop_replace(); return add_update_code(ip, new); } static int add_update(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_test_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_update_nop(rec); } return 0; } static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_call_replace(ip, addr); if (ftrace_write(ip, new, 1)) return -EPERM; return 0; } static int finish_update_nop(struct dyn_ftrace *rec) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_nop_replace(); if (ftrace_write(ip, new, 1)) return -EPERM; return 0; } static int finish_update(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_update_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return finish_update_nop(rec); } return 0; } static void do_sync_core(void *data) { sync_core(); } static void run_sync(void) { int enable_irqs = irqs_disabled(); /* We may be called with interrupts disbled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); if (enable_irqs) local_irq_disable(); } static void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; const char *report = "adding breakpoints"; int count = 0; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = add_breakpoints(rec, enable); if (ret) goto remove_breakpoints; count++; } run_sync(); report = "updating code"; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = add_update(rec, enable); if (ret) goto remove_breakpoints; } run_sync(); report = "removing breakpoints"; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = finish_update(rec, enable); if (ret) goto remove_breakpoints; } run_sync(); return; remove_breakpoints: ftrace_bug(ret, rec ? rec->ip : 0); printk(KERN_WARNING "Failed on %s (%d):\n", report, count); for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); remove_breakpoint(rec); } } void arch_ftrace_update_code(int command) { modifying_ftrace_code++; if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); if (command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); if (command & FTRACE_START_FUNC_RET) ftrace_enable_ftrace_graph_caller(); else if (command & FTRACE_STOP_FUNC_RET) ftrace_disable_ftrace_graph_caller(); modifying_ftrace_code--; } int __init ftrace_dyn_arch_init(void *data) { /* The return code is retured via data */ Loading arch/x86/kernel/nmi.c +5 −5 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); #define nmi_to_desc(type) (&nmi_desc[type]) static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; Loading Loading @@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void static __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", Loading @@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) outb(reason, NMI_REASON_PORT); } static notrace __kprobes void static __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; Loading @@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, NMI_REASON_PORT); } static notrace __kprobes void static __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; Loading Loading @@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); static notrace __kprobes void default_do_nmi(struct pt_regs *regs) static __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; Loading arch/x86/kernel/traps.c +7 −1 Original line number Diff line number Diff line Loading @@ -50,6 +50,7 @@ #include <asm/processor.h> #include <asm/debugreg.h> #include <linux/atomic.h> #include <asm/ftrace.h> #include <asm/traps.h> #include <asm/desc.h> #include <asm/i387.h> Loading Loading @@ -303,8 +304,13 @@ do_general_protection(struct pt_regs *regs, long error_code) } /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE /* ftrace must be first, everything else may cause a recursive crash */ if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) return; #endif #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) Loading Loading
arch/x86/Kconfig +0 −1 Original line number Diff line number Diff line Loading @@ -40,7 +40,6 @@ config X86 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KVM select HAVE_ARCH_KGDB Loading
arch/x86/include/asm/ftrace.h +3 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); extern int modifying_ftrace_code; static inline unsigned long ftrace_call_adjust(unsigned long addr) { Loading @@ -50,6 +51,8 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; int ftrace_int3_handler(struct pt_regs *regs); #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ Loading
arch/x86/kernel/ftrace.c +343 −168 Original line number Diff line number Diff line Loading @@ -24,40 +24,21 @@ #include <trace/syscall.h> #include <asm/cacheflush.h> #include <asm/kprobes.h> #include <asm/ftrace.h> #include <asm/nops.h> #include <asm/nmi.h> #ifdef CONFIG_DYNAMIC_FTRACE /* * modifying_code is set to notify NMIs that they need to use * memory barriers when entering or exiting. But we don't want * to burden NMIs with unnecessary memory barriers when code * modification is not being done (which is most of the time). * * A mutex is already held when ftrace_arch_code_modify_prepare * and post_process are called. No locks need to be taken here. * * Stop machine will make sure currently running NMIs are done * and new NMIs will see the updated variable before we need * to worry about NMIs doing memory barriers. */ static int modifying_code __read_mostly; static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); set_all_modules_text_rw(); modifying_code = 1; return 0; } int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; set_all_modules_text_ro(); set_kernel_text_ro(); return 0; Loading Loading @@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } /* * Modifying code must take extra care. On an SMP machine, if * the code being modified is also being executed on another CPU * that CPU will have undefined results and possibly take a GPF. * We use kstop_machine to stop other CPUS from exectuing code. * But this does not stop NMIs from happening. We still need * to protect against that. We separate out the modification of * the code to take care of this. * * Two buffers are added: An IP buffer and a "code" buffer. * * 1) Put the instruction pointer into the IP buffer * and the new code into the "code" buffer. * 2) Wait for any running NMIs to finish and set a flag that says * we are modifying code, it is done in an atomic operation. * 3) Write the code * 4) clear the flag. * 5) Wait for any running NMIs to finish. * * If an NMI is executed, the first thing it does is to call * "ftrace_nmi_enter". This will check if the flag is set to write * and if it is, it will write what is in the IP and "code" buffers. * * The trick is, it does not matter if everyone is writing the same * content to the code location. Also, if a CPU is executing code * it is OK to write to that code location if the contents being written * are the same as what exists. */ #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; static atomic_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { int r; r = snprintf(buf, size, "%u %u", nmi_wait_count, atomic_read(&nmi_update_count)); return r; } static void clear_mod_flag(void) { int old = atomic_read(&nmi_running); for (;;) { int new = old & ~MOD_CODE_WRITE_FLAG; if (old == new) break; old = atomic_cmpxchg(&nmi_running, old, new); } } static void ftrace_mod_code(void) { /* * Yes, more than one CPU process can be writing to mod_code_status. * (and the code itself) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ if (mod_code_status) clear_mod_flag(); } void ftrace_nmi_enter(void) { __this_cpu_write(save_modifying_code, modifying_code); if (!__this_cpu_read(save_modifying_code)) return; if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); ftrace_mod_code(); atomic_inc(&nmi_update_count); } /* Must have previous changes seen before executions */ smp_mb(); } void ftrace_nmi_exit(void) { if (!__this_cpu_read(save_modifying_code)) return; /* Finish all executions before clearing nmi_running */ smp_mb(); atomic_dec(&nmi_running); } static void wait_for_nmi_and_set_mod_flag(void) { if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) return; do { cpu_relax(); } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); nmi_wait_count++; } static void wait_for_nmi(void) { if (!atomic_read(&nmi_running)) return; do { cpu_relax(); } while (atomic_read(&nmi_running)); nmi_wait_count++; } static inline int within(unsigned long addr, unsigned long start, unsigned long end) { Loading @@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) if (within(ip, (unsigned long)_text, (unsigned long)_etext)) ip = (unsigned long)__va(__pa(ip)); mod_code_ip = (void *)ip; mod_code_newcode = new_code; /* The buffers need to be visible before we let NMIs write them */ smp_mb(); wait_for_nmi_and_set_mod_flag(); /* Make sure all running NMIs have finished before we write the code */ smp_mb(); ftrace_mod_code(); /* Make sure the write happens before clearing the bit */ smp_mb(); clear_mod_flag(); wait_for_nmi(); return mod_code_status; return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); } static const unsigned char *ftrace_nop_replace(void) Loading Loading @@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } int modifying_ftrace_code __read_mostly; /* * A breakpoint was added to the code address we are about to * modify, and this is the handle that will just skip over it. * We are either changing a nop into a trace call, or a trace * call to a nop. While the change is taking place, we treat * it just like it was a nop. */ int ftrace_int3_handler(struct pt_regs *regs) { if (WARN_ON_ONCE(!regs)) return 0; if (!ftrace_location(regs->ip - 1)) return 0; regs->ip += MCOUNT_INSN_SIZE - 1; return 1; } static int ftrace_write(unsigned long ip, const char *val, int size) { /* * On x86_64, kernel text mappings are mapped read-only with * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead * of the kernel text mapping to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. */ if (within(ip, (unsigned long)_text, (unsigned long)_etext)) ip = (unsigned long)__va(__pa(ip)); return probe_kernel_write((void *)ip, val, size); } static int add_break(unsigned long ip, const char *old) { unsigned char replaced[MCOUNT_INSN_SIZE]; unsigned char brk = BREAKPOINT_INSTRUCTION; if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; if (ftrace_write(ip, &brk, 1)) return -EPERM; return 0; } static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned const char *old; unsigned long ip = rec->ip; old = ftrace_call_replace(ip, addr); return add_break(rec->ip, old); } static int add_brk_on_nop(struct dyn_ftrace *rec) { unsigned const char *old; old = ftrace_nop_replace(); return add_break(rec->ip, old); } static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_test_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_brk_on_nop(rec); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); } return 0; } /* * On error, we need to remove breakpoints. This needs to * be done caefully. If the address does not currently have a * breakpoint, we know we are done. Otherwise, we look at the * remaining 4 bytes of the instruction. If it matches a nop * we replace the breakpoint with the nop. Otherwise we replace * it with the call instruction. */ static int remove_breakpoint(struct dyn_ftrace *rec) { unsigned char ins[MCOUNT_INSN_SIZE]; unsigned char brk = BREAKPOINT_INSTRUCTION; const unsigned char *nop; unsigned long ftrace_addr; unsigned long ip = rec->ip; /* If we fail the read, just give up */ if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* If this does not have a breakpoint, we are done */ if (ins[0] != brk) return -1; nop = ftrace_nop_replace(); /* * If the last 4 bytes of the instruction do not match * a nop, then we assume that this is a call to ftrace_addr. */ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { /* * For extra paranoidism, we check if the breakpoint is on * a call that would actually jump to the ftrace_addr. * If not, don't touch the breakpoint, we make just create * a disaster. */ ftrace_addr = (unsigned long)FTRACE_ADDR; nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } return probe_kernel_write((void *)ip, &nop[0], 1); } static int add_update_code(unsigned long ip, unsigned const char *new) { /* skip breakpoint */ ip++; new++; if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) return -EPERM; return 0; } static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_call_replace(ip, addr); return add_update_code(ip, new); } static int add_update_nop(struct dyn_ftrace *rec) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_nop_replace(); return add_update_code(ip, new); } static int add_update(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_test_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_update_nop(rec); } return 0; } static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_call_replace(ip, addr); if (ftrace_write(ip, new, 1)) return -EPERM; return 0; } static int finish_update_nop(struct dyn_ftrace *rec) { unsigned long ip = rec->ip; unsigned const char *new; new = ftrace_nop_replace(); if (ftrace_write(ip, new, 1)) return -EPERM; return 0; } static int finish_update(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; int ret; ret = ftrace_update_record(rec, enable); ftrace_addr = (unsigned long)FTRACE_ADDR; switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return finish_update_nop(rec); } return 0; } static void do_sync_core(void *data) { sync_core(); } static void run_sync(void) { int enable_irqs = irqs_disabled(); /* We may be called with interrupts disbled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); if (enable_irqs) local_irq_disable(); } static void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; const char *report = "adding breakpoints"; int count = 0; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = add_breakpoints(rec, enable); if (ret) goto remove_breakpoints; count++; } run_sync(); report = "updating code"; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = add_update(rec, enable); if (ret) goto remove_breakpoints; } run_sync(); report = "removing breakpoints"; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); ret = finish_update(rec, enable); if (ret) goto remove_breakpoints; } run_sync(); return; remove_breakpoints: ftrace_bug(ret, rec ? rec->ip : 0); printk(KERN_WARNING "Failed on %s (%d):\n", report, count); for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); remove_breakpoint(rec); } } void arch_ftrace_update_code(int command) { modifying_ftrace_code++; if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); if (command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); if (command & FTRACE_START_FUNC_RET) ftrace_enable_ftrace_graph_caller(); else if (command & FTRACE_STOP_FUNC_RET) ftrace_disable_ftrace_graph_caller(); modifying_ftrace_code--; } int __init ftrace_dyn_arch_init(void *data) { /* The return code is retured via data */ Loading
arch/x86/kernel/nmi.c +5 −5 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); #define nmi_to_desc(type) (&nmi_desc[type]) static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; Loading Loading @@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void static __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", Loading @@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) outb(reason, NMI_REASON_PORT); } static notrace __kprobes void static __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; Loading @@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, NMI_REASON_PORT); } static notrace __kprobes void static __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; Loading Loading @@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); static notrace __kprobes void default_do_nmi(struct pt_regs *regs) static __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; Loading
arch/x86/kernel/traps.c +7 −1 Original line number Diff line number Diff line Loading @@ -50,6 +50,7 @@ #include <asm/processor.h> #include <asm/debugreg.h> #include <linux/atomic.h> #include <asm/ftrace.h> #include <asm/traps.h> #include <asm/desc.h> #include <asm/i387.h> Loading Loading @@ -303,8 +304,13 @@ do_general_protection(struct pt_regs *regs, long error_code) } /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE /* ftrace must be first, everything else may cause a recursive crash */ if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) return; #endif #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) Loading