Commit 4ddd6375 authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'net-rps-rfs-improvements'

Eric Dumazet says:

====================
net: rps/rfs improvements

Jason Xing attempted to optimize napi_schedule_rps() by avoiding
unneeded NET_RX_SOFTIRQ raises: [1], [2]

This is quite complex to implement properly. I chose to implement
the idea, and added a similar optimization in ____napi_schedule()

Overall, in an intensive RPC workload, with 32 TX/RX queues with RFS
I was able to observe a ~10% reduction of NET_RX_SOFTIRQ
invocations.

While this had no impact on throughput or cpu costs on this synthetic
benchmark, we know that firing NET_RX_SOFTIRQ from softirq handler
can force __do_softirq() to wakeup ksoftirqd when need_resched() is true.
This can have a latency impact on stressed hosts.

[1] https://lore.kernel.org/lkml/20230325152417.5403-1-kerneljasonxing@gmail.com/
[2] https://lore.kernel.org/netdev/20230328142112.12493-1-kerneljasonxing@gmail.com/
====================

Link: https://lore.kernel.org/r/20230328235021.1048163-1-edumazet@google.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 7079d5e6 8b43fd3d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3188,6 +3188,7 @@ struct softnet_data {
#ifdef CONFIG_RPS
	struct softnet_data	*rps_ipi_list;
#endif
	bool			in_net_rx_action;
#ifdef CONFIG_NET_FLOW_LIMIT
	struct sd_flow_limit __rcu *flow_limit;
#endif
+36 −10
Original line number Diff line number Diff line
@@ -4360,6 +4360,10 @@ static inline void ____napi_schedule(struct softnet_data *sd,
	}

	list_add_tail(&napi->poll_list, &sd->poll_list);
	/* If not called from net_rx_action()
	 * we have to raise NET_RX_SOFTIRQ.
	 */
	if (!sd->in_net_rx_action)
		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}

@@ -4582,11 +4586,16 @@ static void trigger_rx_softirq(void *data)
}

/*
 * Check if this softnet_data structure is another cpu one
 * If yes, queue it to our IPI list and return 1
 * If no, return 0
 * After we queued a packet into sd->input_pkt_queue,
 * we need to make sure this queue is serviced soon.
 *
 * - If this is another cpu queue, link it to our rps_ipi_list,
 *   and make sure we will process rps_ipi_list from net_rx_action().
 *
 * - If this is our own queue, NAPI schedule our backlog.
 *   Note that this also raises NET_RX_SOFTIRQ.
 */
static int napi_schedule_rps(struct softnet_data *sd)
static void napi_schedule_rps(struct softnet_data *sd)
{
	struct softnet_data *mysd = this_cpu_ptr(&softnet_data);

@@ -4595,12 +4604,15 @@ static int napi_schedule_rps(struct softnet_data *sd)
		sd->rps_ipi_next = mysd->rps_ipi_list;
		mysd->rps_ipi_list = sd;

		/* If not called from net_rx_action()
		 * we have to raise NET_RX_SOFTIRQ.
		 */
		if (!mysd->in_net_rx_action)
			__raise_softirq_irqoff(NET_RX_SOFTIRQ);
		return 1;
		return;
	}
#endif /* CONFIG_RPS */
	__napi_schedule_irqoff(&mysd->backlog);
	return 0;
}

#ifdef CONFIG_NET_FLOW_LIMIT
@@ -6640,6 +6652,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
	LIST_HEAD(list);
	LIST_HEAD(repoll);

start:
	sd->in_net_rx_action = true;
	local_irq_disable();
	list_splice_init(&sd->poll_list, &list);
	local_irq_enable();
@@ -6650,8 +6664,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
		skb_defer_free_flush(sd);

		if (list_empty(&list)) {
			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
			if (list_empty(&repoll)) {
				sd->in_net_rx_action = false;
				barrier();
				/* We need to check if ____napi_schedule()
				 * had refilled poll_list while
				 * sd->in_net_rx_action was true.
				 */
				if (!list_empty(&sd->poll_list))
					goto start;
				if (!sd_has_rps_ipi_waiting(sd))
					goto end;
			}
			break;
		}

@@ -6676,6 +6700,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
	list_splice(&list, &sd->poll_list);
	if (!list_empty(&sd->poll_list))
		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
	else
		sd->in_net_rx_action = false;

	net_rps_action_and_irq_enable(sd);
end:;