2021-03-14 22:53:35 +00:00
|
|
|
From: Wei Wang <weiwan@google.com>
|
|
|
|
Date: Mon, 8 Feb 2021 11:34:09 -0800
|
|
|
|
Subject: [PATCH] net: implement threaded-able napi poll loop support
|
|
|
|
|
|
|
|
This patch allows running each napi poll loop inside its own
|
|
|
|
kernel thread.
|
|
|
|
The kthread is created during netif_napi_add() if dev->threaded
|
|
|
|
is set. And threaded mode is enabled in napi_enable(). We will
|
|
|
|
provide a way to set dev->threaded and enable threaded mode
|
|
|
|
without a device up/down in the following patch.
|
|
|
|
|
|
|
|
Once that threaded mode is enabled and the kthread is
|
|
|
|
started, napi_schedule() will wake-up such thread instead
|
|
|
|
of scheduling the softirq.
|
|
|
|
|
|
|
|
The threaded poll loop behaves quite likely the net_rx_action,
|
|
|
|
but it does not have to manipulate local irqs and uses
|
|
|
|
an explicit scheduling point based on netdev_budget.
|
|
|
|
|
|
|
|
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
|
|
|
|
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
|
|
|
Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
|
|
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
|
|
|
|
Co-developed-by: Jakub Kicinski <kuba@kernel.org>
|
|
|
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
|
|
|
Signed-off-by: Wei Wang <weiwan@google.com>
|
|
|
|
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
|
|
|
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
|
|
---
|
|
|
|
|
|
|
|
--- a/include/linux/netdevice.h
|
|
|
|
+++ b/include/linux/netdevice.h
|
|
|
|
@@ -347,6 +347,7 @@ struct napi_struct {
|
|
|
|
struct list_head dev_list;
|
|
|
|
struct hlist_node napi_hash_node;
|
|
|
|
unsigned int napi_id;
|
|
|
|
+ struct task_struct *thread;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
@@ -357,6 +358,7 @@ enum {
|
|
|
|
NAPI_STATE_LISTED, /* NAPI added to system lists */
|
|
|
|
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
|
|
|
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
|
|
|
+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
@@ -367,6 +369,7 @@ enum {
|
|
|
|
NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
|
|
|
|
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
|
|
|
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
|
|
|
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
|
|
|
};
|
|
|
|
|
|
|
|
enum gro_result {
|
|
|
|
@@ -497,20 +500,7 @@ static inline bool napi_complete(struct
|
|
|
|
*/
|
|
|
|
void napi_disable(struct napi_struct *n);
|
|
|
|
|
|
|
|
-/**
|
|
|
|
- * napi_enable - enable NAPI scheduling
|
|
|
|
- * @n: NAPI context
|
|
|
|
- *
|
|
|
|
- * Resume NAPI from being scheduled on this context.
|
|
|
|
- * Must be paired with napi_disable.
|
|
|
|
- */
|
|
|
|
-static inline void napi_enable(struct napi_struct *n)
|
|
|
|
-{
|
|
|
|
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
|
|
|
- smp_mb__before_atomic();
|
|
|
|
- clear_bit(NAPI_STATE_SCHED, &n->state);
|
|
|
|
- clear_bit(NAPI_STATE_NPSVC, &n->state);
|
|
|
|
-}
|
|
|
|
+void napi_enable(struct napi_struct *n);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* napi_synchronize - wait until NAPI is not running
|
2021-04-08 20:27:12 +00:00
|
|
|
@@ -1842,6 +1832,8 @@ enum netdev_ml_priv_type {
|
2021-03-14 22:53:35 +00:00
|
|
|
*
|
|
|
|
* @wol_enabled: Wake-on-LAN is enabled
|
|
|
|
*
|
|
|
|
+ * @threaded: napi threaded mode is enabled
|
|
|
|
+ *
|
|
|
|
* @net_notifier_list: List of per-net netdev notifier block
|
|
|
|
* that follow this device when it is moved
|
|
|
|
* to another network namespace.
|
2021-04-08 20:27:12 +00:00
|
|
|
@@ -2161,6 +2153,7 @@ struct net_device {
|
2021-03-14 22:53:35 +00:00
|
|
|
struct lock_class_key *qdisc_running_key;
|
|
|
|
bool proto_down;
|
|
|
|
unsigned wol_enabled:1;
|
|
|
|
+ unsigned threaded:1;
|
|
|
|
|
|
|
|
struct list_head net_notifier_list;
|
|
|
|
|
|
|
|
--- a/net/core/dev.c
|
|
|
|
+++ b/net/core/dev.c
|
|
|
|
@@ -91,6 +91,7 @@
|
|
|
|
#include <linux/etherdevice.h>
|
|
|
|
#include <linux/ethtool.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
+#include <linux/kthread.h>
|
|
|
|
#include <linux/bpf.h>
|
|
|
|
#include <linux/bpf_trace.h>
|
|
|
|
#include <net/net_namespace.h>
|
2021-03-30 21:01:27 +00:00
|
|
|
@@ -1500,6 +1501,27 @@ void netdev_notify_peers(struct net_devi
|
2021-03-14 22:53:35 +00:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(netdev_notify_peers);
|
|
|
|
|
|
|
|
+static int napi_threaded_poll(void *data);
|
|
|
|
+
|
|
|
|
+static int napi_kthread_create(struct napi_struct *n)
|
|
|
|
+{
|
|
|
|
+ int err = 0;
|
|
|
|
+
|
|
|
|
+ /* Create and wake up the kthread once to put it in
|
|
|
|
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
|
|
|
|
+ * warning and work with loadavg.
|
|
|
|
+ */
|
|
|
|
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
|
|
|
|
+ n->dev->name, n->napi_id);
|
|
|
|
+ if (IS_ERR(n->thread)) {
|
|
|
|
+ err = PTR_ERR(n->thread);
|
|
|
|
+ pr_err("kthread_run failed with err %d\n", err);
|
|
|
|
+ n->thread = NULL;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return err;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
const struct net_device_ops *ops = dev->netdev_ops;
|
2021-06-03 10:12:02 +00:00
|
|
|
@@ -4255,6 +4277,21 @@ int gro_normal_batch __read_mostly = 8;
|
2021-03-14 22:53:35 +00:00
|
|
|
static inline void ____napi_schedule(struct softnet_data *sd,
|
|
|
|
struct napi_struct *napi)
|
|
|
|
{
|
|
|
|
+ struct task_struct *thread;
|
|
|
|
+
|
|
|
|
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
|
|
|
|
+ /* Paired with smp_mb__before_atomic() in
|
|
|
|
+ * napi_enable(). Use READ_ONCE() to guarantee
|
|
|
|
+ * a complete read on napi->thread. Only call
|
|
|
|
+ * wake_up_process() when it's not NULL.
|
|
|
|
+ */
|
|
|
|
+ thread = READ_ONCE(napi->thread);
|
|
|
|
+ if (thread) {
|
|
|
|
+ wake_up_process(thread);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
list_add_tail(&napi->poll_list, &sd->poll_list);
|
|
|
|
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
|
|
|
}
|
2021-07-28 14:53:41 +00:00
|
|
|
@@ -6746,6 +6783,12 @@ void netif_napi_add(struct net_device *d
|
2021-03-14 22:53:35 +00:00
|
|
|
set_bit(NAPI_STATE_NPSVC, &napi->state);
|
|
|
|
list_add_rcu(&napi->dev_list, &dev->napi_list);
|
|
|
|
napi_hash_add(napi);
|
|
|
|
+ /* Create kthread for this napi if dev->threaded is set.
|
|
|
|
+ * Clear dev->threaded if kthread creation failed so that
|
|
|
|
+ * threaded mode will not be enabled in napi_enable().
|
|
|
|
+ */
|
|
|
|
+ if (dev->threaded && napi_kthread_create(napi))
|
|
|
|
+ dev->threaded = 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(netif_napi_add);
|
|
|
|
|
2021-07-28 14:53:41 +00:00
|
|
|
@@ -6762,9 +6805,28 @@ void napi_disable(struct napi_struct *n)
|
2021-03-14 22:53:35 +00:00
|
|
|
hrtimer_cancel(&n->timer);
|
|
|
|
|
|
|
|
clear_bit(NAPI_STATE_DISABLE, &n->state);
|
|
|
|
+ clear_bit(NAPI_STATE_THREADED, &n->state);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(napi_disable);
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * napi_enable - enable NAPI scheduling
|
|
|
|
+ * @n: NAPI context
|
|
|
|
+ *
|
|
|
|
+ * Resume NAPI from being scheduled on this context.
|
|
|
|
+ * Must be paired with napi_disable.
|
|
|
|
+ */
|
|
|
|
+void napi_enable(struct napi_struct *n)
|
|
|
|
+{
|
|
|
|
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
|
|
|
|
+ smp_mb__before_atomic();
|
|
|
|
+ clear_bit(NAPI_STATE_SCHED, &n->state);
|
|
|
|
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
|
|
|
|
+ if (n->dev->threaded && n->thread)
|
|
|
|
+ set_bit(NAPI_STATE_THREADED, &n->state);
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL(napi_enable);
|
|
|
|
+
|
|
|
|
static void flush_gro_hash(struct napi_struct *napi)
|
|
|
|
{
|
|
|
|
int i;
|
2021-07-28 14:53:41 +00:00
|
|
|
@@ -6790,6 +6852,11 @@ void __netif_napi_del(struct napi_struct
|
2021-03-14 22:53:35 +00:00
|
|
|
|
|
|
|
flush_gro_hash(napi);
|
|
|
|
napi->gro_bitmask = 0;
|
|
|
|
+
|
|
|
|
+ if (napi->thread) {
|
|
|
|
+ kthread_stop(napi->thread);
|
|
|
|
+ napi->thread = NULL;
|
|
|
|
+ }
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(__netif_napi_del);
|
|
|
|
|
2021-07-28 14:53:41 +00:00
|
|
|
@@ -6871,6 +6938,51 @@ static int napi_poll(struct napi_struct
|
2021-03-14 22:53:35 +00:00
|
|
|
return work;
|
|
|
|
}
|
|
|
|
|
|
|
|
+static int napi_thread_wait(struct napi_struct *napi)
|
|
|
|
+{
|
|
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
|
|
+
|
|
|
|
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
|
|
|
|
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
|
|
+ WARN_ON(!list_empty(&napi->poll_list));
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ schedule();
|
|
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
|
|
+ }
|
|
|
|
+ __set_current_state(TASK_RUNNING);
|
|
|
|
+ return -1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int napi_threaded_poll(void *data)
|
|
|
|
+{
|
|
|
|
+ struct napi_struct *napi = data;
|
|
|
|
+ void *have;
|
|
|
|
+
|
|
|
|
+ while (!napi_thread_wait(napi)) {
|
|
|
|
+ for (;;) {
|
|
|
|
+ bool repoll = false;
|
|
|
|
+
|
|
|
|
+ local_bh_disable();
|
|
|
|
+
|
|
|
|
+ have = netpoll_poll_lock(napi);
|
|
|
|
+ __napi_poll(napi, &repoll);
|
|
|
|
+ netpoll_poll_unlock(have);
|
|
|
|
+
|
|
|
|
+ __kfree_skb_flush();
|
|
|
|
+ local_bh_enable();
|
|
|
|
+
|
|
|
|
+ if (!repoll)
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ cond_resched();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static __latent_entropy void net_rx_action(struct softirq_action *h)
|
|
|
|
{
|
|
|
|
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|