Commit 6bb218b5 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'napi_threaded_poll-enhancements'

Eric Dumazet says:

====================
net: give napi_threaded_poll() some love

There is interest to revert commit 4cd13c21
("softirq: Let ksoftirqd do its job") and use instead the
napi_threaded_poll() mode.

https://lore.kernel.org/netdev/140f61e2e1fcb8cf53619709046e312e343b53ca.camel@redhat.com/T/#m8a8f5b09844adba157ad0d22fc1233d97013de50



Before doing so, make sure napi_threaded_poll() benefits
from recent core stack improvements, to further reduce
softirq triggers.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 19c60fde 87eff2ec
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -3194,7 +3194,10 @@ struct softnet_data {
#ifdef CONFIG_RPS
	struct softnet_data	*rps_ipi_list;
#endif

	bool			in_net_rx_action;
	bool			in_napi_threaded_poll;

#ifdef CONFIG_NET_FLOW_LIMIT
	struct sd_flow_limit __rcu *flow_limit;
#endif
+34 −23
Original line number Diff line number Diff line
@@ -4603,10 +4603,10 @@ static void napi_schedule_rps(struct softnet_data *sd)
		sd->rps_ipi_next = mysd->rps_ipi_list;
		mysd->rps_ipi_list = sd;

		/* If not called from net_rx_action()
		/* If not called from net_rx_action() or napi_threaded_poll()
		 * we have to raise NET_RX_SOFTIRQ.
		 */
		if (!mysd->in_net_rx_action)
		if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
			__raise_softirq_irqoff(NET_RX_SOFTIRQ);
		return;
	}
@@ -6598,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi)
	return -1;
}

static void skb_defer_free_flush(struct softnet_data *sd)
{
	struct sk_buff *skb, *next;

	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
	if (!READ_ONCE(sd->defer_list))
		return;

	spin_lock(&sd->defer_lock);
	skb = sd->defer_list;
	sd->defer_list = NULL;
	sd->defer_count = 0;
	spin_unlock(&sd->defer_lock);

	while (skb != NULL) {
		next = skb->next;
		napi_consume_skb(skb, 1);
		skb = next;
	}
}

static int napi_threaded_poll(void *data)
{
	struct napi_struct *napi = data;
	struct softnet_data *sd;
	void *have;

	while (!napi_thread_wait(napi)) {
@@ -6608,11 +6630,21 @@ static int napi_threaded_poll(void *data)
			bool repoll = false;

			local_bh_disable();
			sd = this_cpu_ptr(&softnet_data);
			sd->in_napi_threaded_poll = true;

			have = netpoll_poll_lock(napi);
			__napi_poll(napi, &repoll);
			netpoll_poll_unlock(have);

			sd->in_napi_threaded_poll = false;
			barrier();

			if (sd_has_rps_ipi_waiting(sd)) {
				local_irq_disable();
				net_rps_action_and_irq_enable(sd);
			}
			skb_defer_free_flush(sd);
			local_bh_enable();

			if (!repoll)
@@ -6624,27 +6656,6 @@ static int napi_threaded_poll(void *data)
	return 0;
}

static void skb_defer_free_flush(struct softnet_data *sd)
{
	struct sk_buff *skb, *next;

	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
	if (!READ_ONCE(sd->defer_list))
		return;

	spin_lock_irq(&sd->defer_lock);
	skb = sd->defer_list;
	sd->defer_list = NULL;
	sd->defer_count = 0;
	spin_unlock_irq(&sd->defer_lock);

	while (skb != NULL) {
		next = skb->next;
		napi_consume_skb(skb, 1);
		skb = next;
	}
}

static __latent_entropy void net_rx_action(struct softirq_action *h)
{
	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+5 −3
Original line number Diff line number Diff line
@@ -6870,7 +6870,6 @@ void skb_attempt_defer_free(struct sk_buff *skb)
{
	int cpu = skb->alloc_cpu;
	struct softnet_data *sd;
	unsigned long flags;
	unsigned int defer_max;
	bool kick;

@@ -6881,12 +6880,15 @@ nodefer: __kfree_skb(skb);
		return;
	}

	DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
	DEBUG_NET_WARN_ON_ONCE(skb->destructor);

	sd = &per_cpu(softnet_data, cpu);
	defer_max = READ_ONCE(sysctl_skb_defer_max);
	if (READ_ONCE(sd->defer_count) >= defer_max)
		goto nodefer;

	spin_lock_irqsave(&sd->defer_lock, flags);
	spin_lock_bh(&sd->defer_lock);
	/* Send an IPI every time queue reaches half capacity. */
	kick = sd->defer_count == (defer_max >> 1);
	/* Paired with the READ_ONCE() few lines above */
@@ -6895,7 +6897,7 @@ nodefer: __kfree_skb(skb);
	skb->next = sd->defer_list;
	/* Paired with READ_ONCE() in skb_defer_free_flush() */
	WRITE_ONCE(sd->defer_list, skb);
	spin_unlock_irqrestore(&sd->defer_lock, flags);
	spin_unlock_bh(&sd->defer_lock);

	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
	 * if we are unlucky enough (this seems very unlikely).