Commit a8248fc4 authored by Paolo Abeni's avatar Paolo Abeni
Browse files
David Howells says:

====================
Here's the fifth part of patches in the process of moving rxrpc from doing
a lot of its stuff in softirq context to doing it in an I/O thread in
process context and thereby making it easier to support a larger SACK
table.

The full description is in the description for the first part[1] which is
now upstream.  The second and third parts are also upstream[2].  A subset
of the original fourth part[3] got applied as a fix for a race[4].

The fifth part includes some cleanups:

 (1) Miscellaneous trace header cleanups: fix a trace string, display the
     security index in rx_packet rather than displaying the type twice,
     remove some whitespace to make checkpatch happier and remove some
     excess tabulation.

 (2) Convert ->recvmsg_lock to a spinlock as it's only ever locked
     exclusively.

 (3) Make ->ackr_window and ->ackr_nr_unacked non-atomic as they're only
     used in the I/O thread.

 (4) Don't use call->tx_lock to access ->tx_buffer as that is only accessed
     inside the I/O thread.  sendmsg() loads onto ->tx_sendmsg and the I/O
     thread decants from that to the buffer.

 (5) Remove local->defrag_sem as DATA packets are transmitted serially by
     the I/O thread.

 (6) Remove the service connection bundle is it was only used for its
     channel_lock - which has now gone.

And some more significant changes:

 (7) Add a debugging option to allow a delay to be injected into packet
     reception to help investigate the behaviour over longer links than
     just a few cm.

 (8) Generate occasional PING ACKs to probe for RTT information during a
     receive heavy call.

 (9) Simplify the SACK table maintenance and ACK generation.  Now that both
     parts are done in the same thread, there's no possibility of a race
     and no need to try and be cunning to avoid taking a BH spinlock whilst
     copying the SACK table (which in the future will be up to 2K) and no
     need to rotate the copy to fit the ACK packet table.

(10) Use SKB_CONSUMED when freeing received DATA packets (stop dropwatch
     complaining).

* tag 'rxrpc-next-20230131' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  rxrpc: Kill service bundle
  rxrpc: Change rx_packet tracepoint to display securityIndex not type twice
  rxrpc: Show consumed and freed packets as non-dropped in dropwatch
  rxrpc: Remove local->defrag_sem
  rxrpc: Don't lock call->tx_lock to access call->tx_buffer
  rxrpc: Simplify ACK handling
  rxrpc: De-atomic call->ackr_window and call->ackr_nr_unacked
  rxrpc: Generate extra pings for RTT during heavy-receive call
  rxrpc: Allow a delay to be injected into packet reception
  rxrpc: Convert call->recvmsg_lock to a spinlock
  rxrpc: Shrink the tabulation in the rxrpc trace header a bit
  rxrpc: Remove whitespace before ')' in trace header
  rxrpc: Fix trace string
====================

Link: https://lore.kernel.org/all/20230131171227.3912130-1-dhowells@redhat.com/


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 609aa68d 550130a0
Loading
Loading
Loading
Loading
+259 −221
Original line number Diff line number Diff line
@@ -163,7 +163,7 @@
	EM(rxrpc_local_put_for_use,		"PUT for-use ") \
	EM(rxrpc_local_put_kill_conn,		"PUT conn-kil") \
	EM(rxrpc_local_put_peer,		"PUT peer    ") \
	EM(rxrpc_local_put_prealloc_conn,	"PUT conn-pre") \
	EM(rxrpc_local_put_prealloc_peer,	"PUT peer-pre") \
	EM(rxrpc_local_put_release_sock,	"PUT rel-sock") \
	EM(rxrpc_local_stop,			"STOP        ") \
	EM(rxrpc_local_stopped,			"STOPPED     ") \
@@ -360,11 +360,12 @@
	EM(rxrpc_propose_ack_client_tx_end,	"ClTxEnd") \
	EM(rxrpc_propose_ack_input_data,	"DataIn ") \
	EM(rxrpc_propose_ack_input_data_hole,	"DataInH") \
	EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
	EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
	EM(rxrpc_propose_ack_ping_for_lost_ack,	"LostAck") \
	EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
	EM(rxrpc_propose_ack_ping_for_old_rtt,	"OldRtt ") \
	EM(rxrpc_propose_ack_ping_for_params,	"Params ") \
	EM(rxrpc_propose_ack_ping_for_rtt,	"Rtt    ") \
	EM(rxrpc_propose_ack_processing_op,	"ProcOp ") \
	EM(rxrpc_propose_ack_respond_to_ack,	"Rsp2Ack") \
	EM(rxrpc_propose_ack_respond_to_ping,	"Rsp2Png") \
@@ -421,6 +422,13 @@
	EM(RXRPC_ACK_IDLE,			"IDL") \
	E_(RXRPC_ACK__INVALID,			"-?-")

#define rxrpc_sack_traces \
	EM(rxrpc_sack_advance,			"ADV")	\
	EM(rxrpc_sack_fill,			"FIL")	\
	EM(rxrpc_sack_nack,			"NAK")	\
	EM(rxrpc_sack_none,			"---")	\
	E_(rxrpc_sack_oos,			"OOS")

#define rxrpc_completions \
	EM(RXRPC_CALL_SUCCEEDED,		"Succeeded") \
	EM(RXRPC_CALL_REMOTELY_ABORTED,		"RemoteAbort") \
@@ -496,6 +504,7 @@ enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
enum rxrpc_req_ack_trace	{ rxrpc_req_ack_traces } __mode(byte);
enum rxrpc_rtt_rx_trace		{ rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace		{ rxrpc_rtt_tx_traces } __mode(byte);
enum rxrpc_sack_trace		{ rxrpc_sack_traces } __mode(byte);
enum rxrpc_skb_trace		{ rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace		{ rxrpc_timer_traces } __mode(byte);
enum rxrpc_tx_point		{ rxrpc_tx_points } __mode(byte);
@@ -530,6 +539,7 @@ rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
rxrpc_sack_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
rxrpc_tx_points;
@@ -742,9 +752,8 @@ TRACE_EVENT(rxrpc_rx_packet,
		      __entry->hdr.epoch, __entry->hdr.cid,
		      __entry->hdr.callNumber, __entry->hdr.serviceId,
		      __entry->hdr.serial, __entry->hdr.seq,
		      __entry->hdr.type, __entry->hdr.flags,
		      __entry->hdr.type <= 15 ?
		      __print_symbolic(__entry->hdr.type, rxrpc_pkts) : "?UNK")
		      __entry->hdr.securityIndex, __entry->hdr.flags,
		      __print_symbolic(__entry->hdr.type, rxrpc_pkts))
	    );

TRACE_EVENT(rxrpc_rx_done,
@@ -1151,7 +1160,8 @@ TRACE_EVENT(rxrpc_receive,
		    __field(enum rxrpc_receive_trace,	why)
		    __field(rxrpc_serial_t,		serial)
		    __field(rxrpc_seq_t,		seq)
		    __field(u64,			window		)
		    __field(rxrpc_seq_t,		window)
		    __field(rxrpc_seq_t,		wtop)
			     ),

	    TP_fast_assign(
@@ -1159,7 +1169,8 @@ TRACE_EVENT(rxrpc_receive,
		    __entry->why = why;
		    __entry->serial = serial;
		    __entry->seq = seq;
		    __entry->window = atomic64_read(&call->ackr_window);
		    __entry->window = call->ackr_window;
		    __entry->wtop = call->ackr_wtop;
			   ),

	    TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x",
@@ -1167,8 +1178,8 @@ TRACE_EVENT(rxrpc_receive,
		      __print_symbolic(__entry->why, rxrpc_receive_traces),
		      __entry->serial,
		      __entry->seq,
		      lower_32_bits(__entry->window),
		      upper_32_bits(__entry->window))
		      __entry->window,
		      __entry->wtop)
	    );

TRACE_EVENT(rxrpc_recvmsg,
@@ -1926,6 +1937,33 @@ TRACE_EVENT(rxrpc_call_poked,
		      __entry->call_debug_id)
	    );

TRACE_EVENT(rxrpc_sack,
	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
		     unsigned int sack, enum rxrpc_sack_trace what),

	    TP_ARGS(call, seq, sack, what),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call_debug_id)
		    __field(rxrpc_seq_t,		seq)
		    __field(unsigned int,		sack)
		    __field(enum rxrpc_sack_trace,	what)
			     ),

	    TP_fast_assign(
		    __entry->call_debug_id = call->debug_id;
		    __entry->seq = seq;
		    __entry->sack = sack;
		    __entry->what = what;
			   ),

	    TP_printk("c=%08x q=%08x %s k=%x",
		      __entry->call_debug_id,
		      __entry->seq,
		      __print_symbolic(__entry->what, rxrpc_sack_traces),
		      __entry->sack)
	    );

#undef EM
#undef E_

+9 −0
Original line number Diff line number Diff line
@@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS
	  Say Y here to inject packet loss by discarding some received and some
	  transmitted packets.

config AF_RXRPC_INJECT_RX_DELAY
	bool "Inject delay into packet reception"
	depends on SYSCTL
	help
	  Say Y here to inject a delay into packet reception, allowing an
	  extended RTT time to be modelled.  The delay can be configured using
	  /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
	  milliseconds up to 0.5s (note that the granularity is actually in
	  jiffies).

config AF_RXRPC_DEBUG
	bool "RxRPC dynamic debugging"
+1 −1
Original line number Diff line number Diff line
@@ -786,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
	INIT_LIST_HEAD(&rx->sock_calls);
	INIT_LIST_HEAD(&rx->to_be_accepted);
	INIT_LIST_HEAD(&rx->recvmsg_q);
	rwlock_init(&rx->recvmsg_lock);
	spin_lock_init(&rx->recvmsg_lock);
	rwlock_init(&rx->call_lock);
	memset(&rx->srx, 0, sizeof(rx->srx));

+11 −4
Original line number Diff line number Diff line
@@ -149,7 +149,7 @@ struct rxrpc_sock {
	struct list_head	sock_calls;	/* List of calls owned by this socket */
	struct list_head	to_be_accepted;	/* calls awaiting acceptance */
	struct list_head	recvmsg_q;	/* Calls awaiting recvmsg's attention  */
	rwlock_t		recvmsg_lock;	/* Lock for recvmsg_q */
	spinlock_t		recvmsg_lock;	/* Lock for recvmsg_q */
	struct key		*key;		/* security for this socket */
	struct key		*securities;	/* list of server security descriptors */
	struct rb_root		calls;		/* User ID -> call mapping */
@@ -284,7 +284,9 @@ struct rxrpc_local {
	struct task_struct	*io_thread;
	struct completion	io_thread_ready; /* Indication that the I/O thread started */
	struct rxrpc_sock	*service;	/* Service(s) listening on this endpoint */
	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
	struct sk_buff_head	rx_delay_queue;	/* Delay injection queue */
#endif
	struct sk_buff_head	rx_queue;	/* Received packets */
	struct list_head	conn_attend_q;	/* Conns requiring immediate attention */
	struct list_head	call_attend_q;	/* Calls requiring immediate attention */
@@ -688,9 +690,11 @@ struct rxrpc_call {

	/* Receive-phase ACK management (ACKs we send). */
	u8			ackr_reason;	/* reason to ACK */
	u16			ackr_sack_base;	/* Starting slot in SACK table ring */
	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
	atomic64_t		ackr_window;	/* Base (in LSW) and top (in MSW) of SACK window */
	atomic_t		ackr_nr_unacked; /* Number of unacked packets */
	rxrpc_seq_t		ackr_window;	/* Base of SACK window */
	rxrpc_seq_t		ackr_wtop;	/* Base of SACK window */
	unsigned int		ackr_nr_unacked; /* Number of unacked packets */
	atomic_t		ackr_nr_consumed; /* Number of packets needing hard ACK */
	struct {
#define RXRPC_SACK_SIZE 256
@@ -1109,6 +1113,9 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
extern unsigned long rxrpc_inject_rx_delay;
#endif

/*
 * net_ns.c
+1 −1
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
	tail = b->peer_backlog_tail;
	while (CIRC_CNT(head, tail, size) > 0) {
		struct rxrpc_peer *peer = b->peer_backlog[tail];
		rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
		rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
		kfree(peer);
		tail = (tail + 1) & (size - 1);
	}
Loading