Commit 4f9e7fab authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull tracing fixes from Steven Rostedt:

 - Fix ring buffer being permanently disabled due to missed
   record_disabled()

   Changing the trace cpu mask will disable the ring buffers for the
   CPUs no longer in the mask. But it fails to update the snapshot
   buffer. If a snapshot takes place, the accounting for the ring buffer
   being disabled is corrupted and this can lead to the ring buffer
   being permanently disabled.

 - Add test case for snapshot and cpu mask working together

 - Fix memleak by the function graph tracer not getting closed properly.

   The iterator is used to read the ring buffer. When it opens, it calls
   the open function of a tracer, and when it is closed, it calls the
   close iteration. While a trace is being read, it is still possible to
   change the tracer.

   If this happens between the function graph tracer and the wakeup
   tracer (which uses function graph tracing), the tracers are not
   closed properly during when the iterator sees the switch, and the
   wakeup function did not initialize its private pointer to NULL, which
   is used to know if the function graph tracer was the last tracer. It
   could be fooled in thinking it is, but then on exit it does not call
   the close function of the function graph tracer to clean up its data.

 - Fix synthetic events on big endian machines, by introducing a union
   that does the conversions properly.

 - Fix synthetic events from printing out the number of elements in the
   stacktrace when it shouldn't.

 - Fix synthetic events stacktrace to not print a bogus value at the
   end.

 - Introduce a pipe_cpumask that prevents the trace_pipe files from
   being opened by more than one task (file descriptor).

   There was a race found where if splice is called, the iter->ent could
   become stale and events could be missed. There's no point reading a
   producer/consumer file by more than one task as they will corrupt
   each other anyway. Add a cpumask that keeps track of the per_cpu
   trace_pipe files as well as the global trace_pipe file that prevents
   more than one open of a trace_pipe file that represents the same ring
   buffer. This prevents the race from happening.

 - Fix ftrace samples for arm64 to work with older compilers.

* tag 'trace-v6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  samples: ftrace: Replace bti assembly with hint for older compiler
  tracing: Introduce pipe_cpumask to avoid race on trace_pipes
  tracing: Fix memleak due to race between current_tracer and trace
  tracing/synthetic: Allocate one additional element for size
  tracing/synthetic: Skip first entry for stack traces
  tracing/synthetic: Use union instead of casts
  selftests/ftrace: Add a basic testcase for snapshot
  tracing: Fix cpu buffers unavailable due to 'record_disabled' missed
parents 14ddccc8 e332938e
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
extern __printf(2, 3)
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);

/* Used to find the offset and length of dynamic fields in trace events */
struct trace_dynamic_info {
#ifdef CONFIG_CPU_BIG_ENDIAN
	u16	offset;
	u16	len;
#else
	u16	len;
	u16	offset;
#endif
};

/*
 * The trace entry - the most basic unit of tracing. This is what
 * is printed in the end as a single line in the trace output, such as:
+62 −8
Original line number Diff line number Diff line
@@ -4213,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
	 * will point to the same string as current_trace->name.
	 */
	mutex_lock(&trace_types_lock);
	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
		/* Close iter->trace before switching to the new current tracer */
		if (iter->trace->close)
			iter->trace->close(iter);
		*iter->trace = *tr->current_trace;
		/* Reopen the new current tracer */
		if (iter->trace->open)
			iter->trace->open(iter);
	}
	mutex_unlock(&trace_types_lock);

#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5277,11 +5284,17 @@ int tracing_set_cpumask(struct trace_array *tr,
				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
#endif
		}
		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
#endif
		}
	}
	arch_spin_unlock(&tr->max_lock);
@@ -6705,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,

#endif

static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
{
	if (cpu == RING_BUFFER_ALL_CPUS) {
		if (cpumask_empty(tr->pipe_cpumask)) {
			cpumask_setall(tr->pipe_cpumask);
			return 0;
		}
	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
		cpumask_set_cpu(cpu, tr->pipe_cpumask);
		return 0;
	}
	return -EBUSY;
}

static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
{
	if (cpu == RING_BUFFER_ALL_CPUS) {
		WARN_ON(!cpumask_full(tr->pipe_cpumask));
		cpumask_clear(tr->pipe_cpumask);
	} else {
		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
	}
}

static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
	struct trace_array *tr = inode->i_private;
	struct trace_iterator *iter;
	int cpu;
	int ret;

	ret = tracing_check_open_get_tr(tr);
@@ -6716,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
		return ret;

	mutex_lock(&trace_types_lock);
	cpu = tracing_get_cpu(inode);
	ret = open_pipe_on_cpu(tr, cpu);
	if (ret)
		goto fail_pipe_on_cpu;

	/* create a buffer to store the information to pass to userspace */
	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
	if (!iter) {
		ret = -ENOMEM;
		__trace_array_put(tr);
		goto out;
		goto fail_alloc_iter;
	}

	trace_seq_init(&iter->seq);
@@ -6745,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)

	iter->tr = tr;
	iter->array_buffer = &tr->array_buffer;
	iter->cpu_file = tracing_get_cpu(inode);
	iter->cpu_file = cpu;
	mutex_init(&iter->mutex);
	filp->private_data = iter;

@@ -6755,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
	nonseekable_open(inode, filp);

	tr->trace_ref++;
out:

	mutex_unlock(&trace_types_lock);
	return ret;

fail:
	kfree(iter);
fail_alloc_iter:
	close_pipe_on_cpu(tr, cpu);
fail_pipe_on_cpu:
	__trace_array_put(tr);
	mutex_unlock(&trace_types_lock);
	return ret;
@@ -6777,7 +6822,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)

	if (iter->trace->pipe_close)
		iter->trace->pipe_close(iter);

	close_pipe_on_cpu(tr, iter->cpu_file);
	mutex_unlock(&trace_types_lock);

	free_cpumask_var(iter->started);
@@ -9441,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
		goto out_free_tr;

	if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
		goto out_free_tr;

	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;

	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9482,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
 out_free_tr:
	ftrace_free_ftrace_ops(tr);
	free_trace_buffers(tr);
	free_cpumask_var(tr->pipe_cpumask);
	free_cpumask_var(tr->tracing_cpumask);
	kfree(tr->name);
	kfree(tr);
@@ -9584,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
	}
	kfree(tr->topts);

	free_cpumask_var(tr->pipe_cpumask);
	free_cpumask_var(tr->tracing_cpumask);
	kfree(tr->name);
	kfree(tr);
@@ -10381,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
	if (trace_create_savedcmd() < 0)
		goto out_free_temp_buffer;

	if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
		goto out_free_savedcmd;

	/* TODO: make the number of buffers hot pluggable with CPUS */
	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
		goto out_free_savedcmd;
		goto out_free_pipe_cpumask;
	}

	if (global_trace.buffer_disabled)
		tracing_off();

@@ -10439,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)

	return 0;

out_free_pipe_cpumask:
	free_cpumask_var(global_trace.pipe_cpumask);
out_free_savedcmd:
	free_saved_cmdlines_buffer(savedcmd);
out_free_temp_buffer:
+10 −0
Original line number Diff line number Diff line
@@ -377,6 +377,8 @@ struct trace_array {
	struct list_head	events;
	struct trace_event_file *trace_marker_file;
	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */
	/* one per_cpu trace_pipe can be opened by only one user */
	cpumask_var_t		pipe_cpumask;
	int			ref;
	int			trace_ref;
#ifdef CONFIG_FUNCTION_TRACER
@@ -1295,6 +1297,14 @@ static inline void trace_branch_disable(void)
/* set ring buffers to default size if not already done so */
int tracing_update_buffers(void);

union trace_synth_field {
	u8				as_u8;
	u16				as_u16;
	u32				as_u32;
	u64				as_u64;
	struct trace_dynamic_info	as_dynamic;
};

struct ftrace_event_field {
	struct list_head	link;
	const char		*name;
+41 −62
Original line number Diff line number Diff line
@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,

struct synth_trace_event {
	struct trace_entry	ent;
	u64			fields[];
	union trace_synth_field	fields[];
};

static int synth_event_define_fields(struct trace_event_call *call)
@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)

static void print_synth_event_num_val(struct trace_seq *s,
				      char *print_fmt, char *name,
				      int size, u64 val, char *space)
				      int size, union trace_synth_field *val, char *space)
{
	switch (size) {
	case 1:
		trace_seq_printf(s, print_fmt, name, (u8)val, space);
		trace_seq_printf(s, print_fmt, name, val->as_u8, space);
		break;

	case 2:
		trace_seq_printf(s, print_fmt, name, (u16)val, space);
		trace_seq_printf(s, print_fmt, name, val->as_u16, space);
		break;

	case 4:
		trace_seq_printf(s, print_fmt, name, (u32)val, space);
		trace_seq_printf(s, print_fmt, name, val->as_u32, space);
		break;

	default:
@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
	struct trace_seq *s = &iter->seq;
	struct synth_trace_event *entry;
	struct synth_event *se;
	unsigned int i, n_u64;
	unsigned int i, j, n_u64;
	char print_fmt[32];
	const char *fmt;

@@ -374,43 +374,28 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
		/* parameter values */
		if (se->fields[i]->is_string) {
			if (se->fields[i]->is_dynamic) {
				u32 offset, data_offset;
				char *str_field;

				offset = (u32)entry->fields[n_u64];
				data_offset = offset & 0xffff;

				str_field = (char *)entry + data_offset;
				union trace_synth_field *data = &entry->fields[n_u64];

				trace_seq_printf(s, print_fmt, se->fields[i]->name,
						 STR_VAR_LEN_MAX,
						 str_field,
						 (char *)entry + data->as_dynamic.offset,
						 i == se->n_fields - 1 ? "" : " ");
				n_u64++;
			} else {
				trace_seq_printf(s, print_fmt, se->fields[i]->name,
						 STR_VAR_LEN_MAX,
						 (char *)&entry->fields[n_u64],
						 (char *)&entry->fields[n_u64].as_u64,
						 i == se->n_fields - 1 ? "" : " ");
				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
			}
		} else if (se->fields[i]->is_stack) {
			u32 offset, data_offset, len;
			unsigned long *p, *end;

			offset = (u32)entry->fields[n_u64];
			data_offset = offset & 0xffff;
			len = offset >> 16;

			p = (void *)entry + data_offset;
			end = (void *)p + len - (sizeof(long) - 1);
			union trace_synth_field *data = &entry->fields[n_u64];
			unsigned long *p = (void *)entry + data->as_dynamic.offset;

			trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);

			for (; *p && p < end; p++)
				trace_seq_printf(s, "=> %pS\n", (void *)*p);
			for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
				trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
			n_u64++;

		} else {
			struct trace_print_flags __flags[] = {
			    __def_gfpflag_names, {-1, NULL} };
@@ -419,13 +404,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
			print_synth_event_num_val(s, print_fmt,
						  se->fields[i]->name,
						  se->fields[i]->size,
						  entry->fields[n_u64],
						  &entry->fields[n_u64],
						  space);

			if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
				trace_seq_puts(s, " (");
				trace_print_flags_seq(s, "|",
						      entry->fields[n_u64],
						      entry->fields[n_u64].as_u64,
						      __flags);
				trace_seq_putc(s, ')');
			}
@@ -454,21 +439,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
	int ret;

	if (is_dynamic) {
		u32 data_offset;
		union trace_synth_field *data = &entry->fields[*n_u64];

		data_offset = struct_size(entry, fields, event->n_u64);
		data_offset += data_size;

		len = fetch_store_strlen((unsigned long)str_val);

		data_offset |= len << 16;
		*(u32 *)&entry->fields[*n_u64] = data_offset;
		data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
		data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);

		ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);

		(*n_u64)++;
	} else {
		str_field = (char *)&entry->fields[*n_u64];
		str_field = (char *)&entry->fields[*n_u64].as_u64;

#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
		if ((unsigned long)str_val < TASK_SIZE)
@@ -492,6 +472,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
				 unsigned int data_size,
				 unsigned int *n_u64)
{
	union trace_synth_field *data = &entry->fields[*n_u64];
	unsigned int len;
	u32 data_offset;
	void *data_loc;
@@ -504,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
			break;
	}

	/* Include the zero'd element if it fits */
	if (len < HIST_STACKTRACE_DEPTH)
		len++;

	len *= sizeof(long);

	/* Find the dynamic section to copy the stack into. */
@@ -515,8 +492,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
	memcpy(data_loc, stack, len);

	/* Fill in the field that holds the offset/len combo */
	data_offset |= len << 16;
	*(u32 *)&entry->fields[*n_u64] = data_offset;

	data->as_dynamic.offset = data_offset;
	data->as_dynamic.len = len;

	(*n_u64)++;

@@ -550,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
		str_val = (char *)(long)var_ref_vals[val_idx];

		if (event->dynamic_fields[i]->is_stack) {
			len = *((unsigned long *)str_val);
			/* reserve one extra element for size */
			len = *((unsigned long *)str_val) + 1;
			len *= sizeof(unsigned long);
		} else {
			len = fetch_store_strlen((unsigned long)str_val);
@@ -592,19 +571,19 @@ static notrace void trace_event_raw_event_synth(void *__data,

			switch (field->size) {
			case 1:
				*(u8 *)&entry->fields[n_u64] = (u8)val;
				entry->fields[n_u64].as_u8 = (u8)val;
				break;

			case 2:
				*(u16 *)&entry->fields[n_u64] = (u16)val;
				entry->fields[n_u64].as_u16 = (u16)val;
				break;

			case 4:
				*(u32 *)&entry->fields[n_u64] = (u32)val;
				entry->fields[n_u64].as_u32 = (u32)val;
				break;

			default:
				entry->fields[n_u64] = val;
				entry->fields[n_u64].as_u64 = val;
				break;
			}
			n_u64++;
@@ -1791,19 +1770,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)

			switch (field->size) {
			case 1:
				*(u8 *)&state.entry->fields[n_u64] = (u8)val;
				state.entry->fields[n_u64].as_u8 = (u8)val;
				break;

			case 2:
				*(u16 *)&state.entry->fields[n_u64] = (u16)val;
				state.entry->fields[n_u64].as_u16 = (u16)val;
				break;

			case 4:
				*(u32 *)&state.entry->fields[n_u64] = (u32)val;
				state.entry->fields[n_u64].as_u32 = (u32)val;
				break;

			default:
				state.entry->fields[n_u64] = val;
				state.entry->fields[n_u64].as_u64 = val;
				break;
			}
			n_u64++;
@@ -1884,19 +1863,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,

			switch (field->size) {
			case 1:
				*(u8 *)&state.entry->fields[n_u64] = (u8)val;
				state.entry->fields[n_u64].as_u8 = (u8)val;
				break;

			case 2:
				*(u16 *)&state.entry->fields[n_u64] = (u16)val;
				state.entry->fields[n_u64].as_u16 = (u16)val;
				break;

			case 4:
				*(u32 *)&state.entry->fields[n_u64] = (u32)val;
				state.entry->fields[n_u64].as_u32 = (u32)val;
				break;

			default:
				state.entry->fields[n_u64] = val;
				state.entry->fields[n_u64].as_u64 = val;
				break;
			}
			n_u64++;
@@ -2031,19 +2010,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
	} else {
		switch (field->size) {
		case 1:
			*(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
			trace_state->entry->fields[field->offset].as_u8 = (u8)val;
			break;

		case 2:
			*(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
			trace_state->entry->fields[field->offset].as_u16 = (u16)val;
			break;

		case 4:
			*(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
			trace_state->entry->fields[field->offset].as_u32 = (u32)val;
			break;

		default:
			trace_state->entry->fields[field->offset] = val;
			trace_state->entry->fields[field->offset].as_u64 = val;
			break;
		}
	}
+2 −1
Original line number Diff line number Diff line
@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
{
	if (is_graph(iter->tr))
		graph_trace_open(iter);

	else
		iter->private = NULL;
}

static void irqsoff_trace_close(struct trace_iterator *iter)
Loading