Commit 29863d41 authored by Wei Wang's avatar Wei Wang Committed by David S. Miller
Browse files

net: implement threaded-able napi poll loop support



This patch allows running each napi poll loop inside its own
kernel thread.
The kthread is created during netif_napi_add() if dev->threaded
is set. And threaded mode is enabled in napi_enable(). We will
provide a way to set dev->threaded and enable threaded mode
without a device up/down in the following patch.

Once that threaded mode is enabled and the kthread is
started, napi_schedule() will wake-up such thread instead
of scheduling the softirq.

The threaded poll loop behaves quite likely the net_rx_action,
but it does not have to manipulate local irqs and uses
an explicit scheduling point based on netdev_budget.

Co-developed-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Co-developed-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarHannes Frederic Sowa <hannes@stressinduktion.org>
Co-developed-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Reviewed-by: default avatarAlexander Duyck <alexanderduyck@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 898f8015
Loading
Loading
Loading
Loading
+7 −14
Original line number Diff line number Diff line
@@ -347,6 +347,7 @@ struct napi_struct {
	struct list_head	dev_list;
	struct hlist_node	napi_hash_node;
	unsigned int		napi_id;
	struct task_struct	*thread;
};

enum {
@@ -358,6 +359,7 @@ enum {
	NAPI_STATE_NO_BUSY_POLL,	/* Do not add in napi_hash, no busy polling */
	NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */
	NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/
	NAPI_STATE_THREADED,		/* The poll is performed inside its own thread*/
};

enum {
@@ -369,6 +371,7 @@ enum {
	NAPIF_STATE_NO_BUSY_POLL	= BIT(NAPI_STATE_NO_BUSY_POLL),
	NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL),
	NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL),
	NAPIF_STATE_THREADED		= BIT(NAPI_STATE_THREADED),
};

enum gro_result {
@@ -503,20 +506,7 @@ static inline bool napi_complete(struct napi_struct *n)
 */
void napi_disable(struct napi_struct *n);

/**
 *	napi_enable - enable NAPI scheduling
 *	@n: NAPI context
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 */
static inline void napi_enable(struct napi_struct *n)
{
	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
	smp_mb__before_atomic();
	clear_bit(NAPI_STATE_SCHED, &n->state);
	clear_bit(NAPI_STATE_NPSVC, &n->state);
}
void napi_enable(struct napi_struct *n);

/**
 *	napi_synchronize - wait until NAPI is not running
@@ -1827,6 +1817,8 @@ enum netdev_priv_flags {
 *
 *	@wol_enabled:	Wake-on-LAN is enabled
 *
 *	@threaded:	napi threaded mode is enabled
 *
 *	@net_notifier_list:	List of per-net netdev notifier block
 *				that follow this device when it is moved
 *				to another network namespace.
@@ -2145,6 +2137,7 @@ struct net_device {
	struct lock_class_key	*qdisc_running_key;
	bool			proto_down;
	unsigned		wol_enabled:1;
	unsigned		threaded:1;

	struct list_head	net_notifier_list;

+112 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);

static int napi_threaded_poll(void *data);

static int napi_kthread_create(struct napi_struct *n)
{
	int err = 0;

	/* Create and wake up the kthread once to put it in
	 * TASK_INTERRUPTIBLE mode to avoid the blocked task
	 * warning and work with loadavg.
	 */
	n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
				n->dev->name, n->napi_id);
	if (IS_ERR(n->thread)) {
		err = PTR_ERR(n->thread);
		pr_err("kthread_run failed with err %d\n", err);
		n->thread = NULL;
	}

	return err;
}

static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
	const struct net_device_ops *ops = dev->netdev_ops;
@@ -4265,6 +4287,21 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
				     struct napi_struct *napi)
{
	struct task_struct *thread;

	if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
		/* Paired with smp_mb__before_atomic() in
		 * napi_enable(). Use READ_ONCE() to guarantee
		 * a complete read on napi->thread. Only call
		 * wake_up_process() when it's not NULL.
		 */
		thread = READ_ONCE(napi->thread);
		if (thread) {
			wake_up_process(thread);
			return;
		}
	}

	list_add_tail(&napi->poll_list, &sd->poll_list);
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -6728,6 +6765,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
	set_bit(NAPI_STATE_NPSVC, &napi->state);
	list_add_rcu(&napi->dev_list, &dev->napi_list);
	napi_hash_add(napi);
	/* Create kthread for this napi if dev->threaded is set.
	 * Clear dev->threaded if kthread creation failed so that
	 * threaded mode will not be enabled in napi_enable().
	 */
	if (dev->threaded && napi_kthread_create(napi))
		dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);

@@ -6745,9 +6788,28 @@ void napi_disable(struct napi_struct *n)

	clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
	clear_bit(NAPI_STATE_DISABLE, &n->state);
	clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);

/**
 *	napi_enable - enable NAPI scheduling
 *	@n: NAPI context
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 */
void napi_enable(struct napi_struct *n)
{
	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
	smp_mb__before_atomic();
	clear_bit(NAPI_STATE_SCHED, &n->state);
	clear_bit(NAPI_STATE_NPSVC, &n->state);
	if (n->dev->threaded && n->thread)
		set_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_enable);

static void flush_gro_hash(struct napi_struct *napi)
{
	int i;
@@ -6773,6 +6835,11 @@ void __netif_napi_del(struct napi_struct *napi)

	flush_gro_hash(napi);
	napi->gro_bitmask = 0;

	if (napi->thread) {
		kthread_stop(napi->thread);
		napi->thread = NULL;
	}
}
EXPORT_SYMBOL(__netif_napi_del);

@@ -6867,6 +6934,51 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
	return work;
}

static int napi_thread_wait(struct napi_struct *napi)
{
	set_current_state(TASK_INTERRUPTIBLE);

	while (!kthread_should_stop() && !napi_disable_pending(napi)) {
		if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
			WARN_ON(!list_empty(&napi->poll_list));
			__set_current_state(TASK_RUNNING);
			return 0;
		}

		schedule();
		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);
	return -1;
}

static int napi_threaded_poll(void *data)
{
	struct napi_struct *napi = data;
	void *have;

	while (!napi_thread_wait(napi)) {
		for (;;) {
			bool repoll = false;

			local_bh_disable();

			have = netpoll_poll_lock(napi);
			__napi_poll(napi, &repoll);
			netpoll_poll_unlock(have);

			__kfree_skb_flush();
			local_bh_enable();

			if (!repoll)
				break;

			cond_resched();
		}
	}
	return 0;
}

static __latent_entropy void net_rx_action(struct softirq_action *h)
{
	struct softnet_data *sd = this_cpu_ptr(&softnet_data);