Commit 80a76994 authored by Steven Rostedt (Google)'s avatar Steven Rostedt (Google)
Browse files

tracing: Add "fields" option to show raw trace event fields

The hex, raw and bin formats come from the old PREEMPT_RT patch set
latency tracer. That actually gave real alternatives to reading the ascii
buffer. But they have started to bit rot and they do not give a good
representation of the tracing data.

Add "fields" option that will read the trace event fields and parse the
data from how the fields are defined:

With "fields" = 0 (default)

 echo 1 > events/sched/sched_switch/enable
 cat trace
         <idle>-0       [003] d..2.   540.078653: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/3:1 next_pid=83 next_prio=120
     kworker/3:1-83      [003] d..2.   540.078860: sched_switch: prev_comm=kworker/3:1 prev_pid=83 prev_prio=120 prev_state=I ==> next_comm=swapper/3 next_pid=0 next_prio=120
          <idle>-0       [003] d..2.   540.206423: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=sshd next_pid=807 next_prio=120
            sshd-807     [003] d..2.   540.206531: sched_switch: prev_comm=sshd prev_pid=807 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120
          <idle>-0       [001] d..2.   540.206597: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120
   kworker/u16:4-58      [001] d..2.   540.206617: sched_switch: prev_comm=kworker/u16:4 prev_pid=58 prev_prio=120 prev_state=I ==> next_comm=bash next_pid=830 next_prio=120
            bash-830     [001] d..2.   540.206678: sched_switch: prev_comm=bash prev_pid=830 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120
   kworker/u16:4-58      [001] d..2.   540.206696: sched_switch: prev_comm=kworker/u16:4 prev_pid=58 prev_prio=120 prev_state=I ==> next_comm=bash next_pid=830 next_prio=120
            bash-830     [001] d..2.   540.206713: sched_switch: prev_comm=bash prev_pid=830 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:4 next_pid=58 next_prio=120

 echo 1 > options/fields
           <...>-998     [002] d..2.   538.643732: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/2 prev_state=0x20 (32) prev_prio=0x78 (120) prev_pid=0x3e6 (998) prev_comm=trace-cmd
          <idle>-0       [001] d..2.   538.643806: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x0 (0) prev_comm=swapper/1
            bash-830     [001] d..2.   538.644106: sched_switch: next_prio=0x78 (120) next_pid=0x3a (58) next_comm=kworker/u16:4 prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
   kworker/u16:4-58      [001] d..2.   538.644130: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x80 (128) prev_prio=0x78 (120) prev_pid=0x3a (58) prev_comm=kworker/u16:4
            bash-830     [001] d..2.   538.644180: sched_switch: next_prio=0x78 (120) next_pid=0x3a (58) next_comm=kworker/u16:4 prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
   kworker/u16:4-58      [001] d..2.   538.644185: sched_switch: next_prio=0x78 (120) next_pid=0x33e (830) next_comm=bash prev_state=0x80 (128) prev_prio=0x78 (120) prev_pid=0x3a (58) prev_comm=kworker/u16:4
            bash-830     [001] d..2.   538.644204: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/1 prev_state=0x1 (1) prev_prio=0x78 (120) prev_pid=0x33e (830) prev_comm=bash
          <idle>-0       [003] d..2.   538.644211: sched_switch: next_prio=0x78 (120) next_pid=0x327 (807) next_comm=sshd prev_state=0x0 (0) prev_prio=0x78 (120) prev_pid=0x0 (0) prev_comm=swapper/3
            sshd-807     [003] d..2.   538.644340: sched_switch: next_prio=0x78 (120) next_pid=0x0 (0) next_comm=swapper/3 prev_state=0x1 (1) prev_prio=0x78 (120) prev_pid=0x327 (807) prev_comm=sshd

It traces the data safely without using the trace print formatting.

Link: https://lore.kernel.org/linux-trace-kernel/20230328145156.497651be@gandalf.local.home



Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent c2f92e8b
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -1027,6 +1027,7 @@ To see what is available, simply cat the file::
	nohex
	nobin
	noblock
	nofields
	trace_printk
	annotate
	nouserstacktrace
@@ -1110,6 +1111,11 @@ Here are the available options:
  block
	When set, reading trace_pipe will not block when polled.

  fields
	Print the fields as described by their types. This is a better
	option than using hex, bin or raw, as it gives a better parsing
	of the content of the event.

  trace_printk
	Can disable trace_printk() from writing into the buffer.

+5 −2
Original line number Diff line number Diff line
@@ -3726,7 +3726,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
#define STATIC_FMT_BUF_SIZE	128
static char static_fmt_buf[STATIC_FMT_BUF_SIZE];

static char *trace_iter_expand_format(struct trace_iterator *iter)
char *trace_iter_expand_format(struct trace_iterator *iter)
{
	char *tmp;

@@ -4446,8 +4446,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
	if (trace_seq_has_overflowed(s))
		return TRACE_TYPE_PARTIAL_LINE;

	if (event)
	if (event) {
		if (tr->trace_flags & TRACE_ITER_FIELDS)
			return print_event_fields(iter, event);
		return event->funcs->trace(iter, sym_flags, event);
	}

	trace_seq_printf(s, "Unknown type %d\n", entry->type);

+2 −0
Original line number Diff line number Diff line
@@ -619,6 +619,7 @@ bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
			 va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);

int trace_empty(struct trace_iterator *iter);

@@ -1199,6 +1200,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
		C(HEX,			"hex"),			\
		C(BIN,			"bin"),			\
		C(BLOCK,		"block"),		\
		C(FIELDS,		"fields"),		\
		C(PRINTK,		"trace_printk"),	\
		C(ANNOTATE,		"annotate"),		\
		C(USERSTACKTRACE,	"userstacktrace"),	\
+168 −0
Original line number Diff line number Diff line
@@ -808,6 +808,174 @@ EXPORT_SYMBOL_GPL(unregister_trace_event);
 * Standard events
 */

static void print_array(struct trace_iterator *iter, void *pos,
			struct ftrace_event_field *field)
{
	int offset;
	int len;
	int i;

	offset = *(int *)pos & 0xffff;
	len = *(int *)pos >> 16;

	if (field)
		offset += field->offset;

	if (offset + len >= iter->ent_size) {
		trace_seq_puts(&iter->seq, "<OVERFLOW>");
		return;
	}

	for (i = 0; i < len; i++, pos++) {
		if (i)
			trace_seq_putc(&iter->seq, ',');
		trace_seq_printf(&iter->seq, "%02x", *(unsigned char *)pos);
	}
}

static void print_fields(struct trace_iterator *iter, struct trace_event_call *call,
			 struct list_head *head)
{
	struct ftrace_event_field *field;
	int offset;
	int len;
	int ret;
	void *pos;

	list_for_each_entry(field, head, link) {
		trace_seq_printf(&iter->seq, " %s=", field->name);
		if (field->offset + field->size > iter->ent_size) {
			trace_seq_puts(&iter->seq, "<OVERFLOW>");
			continue;
		}
		pos = (void *)iter->ent + field->offset;

		switch (field->filter_type) {
		case FILTER_COMM:
		case FILTER_STATIC_STRING:
			trace_seq_printf(&iter->seq, "%.*s", field->size, (char *)pos);
			break;
		case FILTER_RDYN_STRING:
		case FILTER_DYN_STRING:
			offset = *(int *)pos & 0xffff;
			len = *(int *)pos >> 16;

			if (field->filter_type == FILTER_RDYN_STRING)
				offset += field->offset;

			if (offset + len >= iter->ent_size) {
				trace_seq_puts(&iter->seq, "<OVERFLOW>");
				break;
			}
			pos = (void *)iter->ent + offset;
			trace_seq_printf(&iter->seq, "%.*s", len, (char *)pos);
			break;
		case FILTER_PTR_STRING:
			if (!iter->fmt_size)
				trace_iter_expand_format(iter);
			pos = *(void **)pos;
			ret = strncpy_from_kernel_nofault(iter->fmt, pos,
							  iter->fmt_size);
			if (ret < 0)
				trace_seq_printf(&iter->seq, "(0x%px)", pos);
			else
				trace_seq_printf(&iter->seq, "(0x%px:%s)",
						 pos, iter->fmt);
			break;
		case FILTER_TRACE_FN:
			pos = *(void **)pos;
			trace_seq_printf(&iter->seq, "%pS", pos);
			break;
		case FILTER_CPU:
		case FILTER_OTHER:
			switch (field->size) {
			case 1:
				if (isprint(*(char *)pos)) {
					trace_seq_printf(&iter->seq, "'%c'",
						 *(unsigned char *)pos);
				}
				trace_seq_printf(&iter->seq, "(%d)",
						 *(unsigned char *)pos);
				break;
			case 2:
				trace_seq_printf(&iter->seq, "0x%x (%d)",
						 *(unsigned short *)pos,
						 *(unsigned short *)pos);
				break;
			case 4:
				/* dynamic array info is 4 bytes */
				if (strstr(field->type, "__data_loc")) {
					print_array(iter, pos, NULL);
					break;
				}

				if (strstr(field->type, "__rel_loc")) {
					print_array(iter, pos, field);
					break;
				}

				trace_seq_printf(&iter->seq, "0x%x (%d)",
						 *(unsigned int *)pos,
						 *(unsigned int *)pos);
				break;
			case 8:
				trace_seq_printf(&iter->seq, "0x%llx (%lld)",
						 *(unsigned long long *)pos,
						 *(unsigned long long *)pos);
				break;
			default:
				trace_seq_puts(&iter->seq, "<INVALID-SIZE>");
				break;
			}
			break;
		default:
			trace_seq_puts(&iter->seq, "<INVALID-TYPE>");
		}
	}
	trace_seq_putc(&iter->seq, '\n');
}

enum print_line_t print_event_fields(struct trace_iterator *iter,
				     struct trace_event *event)
{
	struct trace_event_call *call;
	struct list_head *head;

	/* ftrace defined events have separate call structures */
	if (event->type <= __TRACE_LAST_TYPE) {
		bool found = false;

		down_read(&trace_event_sem);
		list_for_each_entry(call, &ftrace_events, list) {
			if (call->event.type == event->type) {
				found = true;
				break;
			}
			/* No need to search all events */
			if (call->event.type > __TRACE_LAST_TYPE)
				break;
		}
		up_read(&trace_event_sem);
		if (!found) {
			trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type);
			goto out;
		}
	} else {
		call = container_of(event, struct trace_event_call, event);
	}
	head = trace_get_fields(call);

	trace_seq_printf(&iter->seq, "%s:", trace_event_name(call));

	if (head && !list_empty(head))
		print_fields(iter, call, head);
	else
		trace_seq_puts(&iter->seq, "No fields found\n");

 out:
	return trace_handle_return(&iter->seq);
}

enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
				  struct trace_event *event)
{
+2 −0
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
extern enum print_line_t print_event_fields(struct trace_iterator *iter,
					    struct trace_event *event);

extern void trace_event_read_lock(void);
extern void trace_event_read_unlock(void);