123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- From: Wei Wang <[email protected]>
- Date: Mon, 8 Feb 2021 11:34:09 -0800
- Subject: [PATCH] net: implement threaded-able napi poll loop support
- This patch allows running each napi poll loop inside its own
- kernel thread.
- The kthread is created during netif_napi_add() if dev->threaded
- is set. And threaded mode is enabled in napi_enable(). We will
- provide a way to set dev->threaded and enable threaded mode
- without a device up/down in the following patch.
- Once that threaded mode is enabled and the kthread is
- started, napi_schedule() will wake-up such thread instead
- of scheduling the softirq.
- The threaded poll loop behaves quite likely the net_rx_action,
- but it does not have to manipulate local irqs and uses
- an explicit scheduling point based on netdev_budget.
- Co-developed-by: Paolo Abeni <[email protected]>
- Signed-off-by: Paolo Abeni <[email protected]>
- Co-developed-by: Hannes Frederic Sowa <[email protected]>
- Signed-off-by: Hannes Frederic Sowa <[email protected]>
- Co-developed-by: Jakub Kicinski <[email protected]>
- Signed-off-by: Jakub Kicinski <[email protected]>
- Signed-off-by: Wei Wang <[email protected]>
- Reviewed-by: Alexander Duyck <[email protected]>
- Signed-off-by: David S. Miller <[email protected]>
- ---
- --- a/include/linux/netdevice.h
- +++ b/include/linux/netdevice.h
- @@ -347,6 +347,7 @@ struct napi_struct {
- struct list_head dev_list;
- struct hlist_node napi_hash_node;
- unsigned int napi_id;
- + struct task_struct *thread;
- };
-
- enum {
- @@ -357,6 +358,7 @@ enum {
- NAPI_STATE_LISTED, /* NAPI added to system lists */
- NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
- NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
- + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
- };
-
- enum {
- @@ -367,6 +369,7 @@ enum {
- NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
- NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
- NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
- + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
- };
-
- enum gro_result {
- @@ -497,20 +500,7 @@ static inline bool napi_complete(struct
- */
- void napi_disable(struct napi_struct *n);
-
- -/**
- - * napi_enable - enable NAPI scheduling
- - * @n: NAPI context
- - *
- - * Resume NAPI from being scheduled on this context.
- - * Must be paired with napi_disable.
- - */
- -static inline void napi_enable(struct napi_struct *n)
- -{
- - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- - smp_mb__before_atomic();
- - clear_bit(NAPI_STATE_SCHED, &n->state);
- - clear_bit(NAPI_STATE_NPSVC, &n->state);
- -}
- +void napi_enable(struct napi_struct *n);
-
- /**
- * napi_synchronize - wait until NAPI is not running
- @@ -1842,6 +1832,8 @@ enum netdev_ml_priv_type {
- *
- * @wol_enabled: Wake-on-LAN is enabled
- *
- + * @threaded: napi threaded mode is enabled
- + *
- * @net_notifier_list: List of per-net netdev notifier block
- * that follow this device when it is moved
- * to another network namespace.
- @@ -2161,6 +2153,7 @@ struct net_device {
- struct lock_class_key *qdisc_running_key;
- bool proto_down;
- unsigned wol_enabled:1;
- + unsigned threaded:1;
-
- struct list_head net_notifier_list;
-
- --- a/net/core/dev.c
- +++ b/net/core/dev.c
- @@ -91,6 +91,7 @@
- #include <linux/etherdevice.h>
- #include <linux/ethtool.h>
- #include <linux/skbuff.h>
- +#include <linux/kthread.h>
- #include <linux/bpf.h>
- #include <linux/bpf_trace.h>
- #include <net/net_namespace.h>
- @@ -1500,6 +1501,27 @@ void netdev_notify_peers(struct net_devi
- }
- EXPORT_SYMBOL(netdev_notify_peers);
-
- +static int napi_threaded_poll(void *data);
- +
- +static int napi_kthread_create(struct napi_struct *n)
- +{
- + int err = 0;
- +
- + /* Create and wake up the kthread once to put it in
- + * TASK_INTERRUPTIBLE mode to avoid the blocked task
- + * warning and work with loadavg.
- + */
- + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
- + n->dev->name, n->napi_id);
- + if (IS_ERR(n->thread)) {
- + err = PTR_ERR(n->thread);
- + pr_err("kthread_run failed with err %d\n", err);
- + n->thread = NULL;
- + }
- +
- + return err;
- +}
- +
- static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
- {
- const struct net_device_ops *ops = dev->netdev_ops;
- @@ -4267,6 +4289,21 @@ int gro_normal_batch __read_mostly = 8;
- static inline void ____napi_schedule(struct softnet_data *sd,
- struct napi_struct *napi)
- {
- + struct task_struct *thread;
- +
- + if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
- + /* Paired with smp_mb__before_atomic() in
- + * napi_enable(). Use READ_ONCE() to guarantee
- + * a complete read on napi->thread. Only call
- + * wake_up_process() when it's not NULL.
- + */
- + thread = READ_ONCE(napi->thread);
- + if (thread) {
- + wake_up_process(thread);
- + return;
- + }
- + }
- +
- list_add_tail(&napi->poll_list, &sd->poll_list);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- }
- @@ -6758,6 +6795,12 @@ void netif_napi_add(struct net_device *d
- set_bit(NAPI_STATE_NPSVC, &napi->state);
- list_add_rcu(&napi->dev_list, &dev->napi_list);
- napi_hash_add(napi);
- + /* Create kthread for this napi if dev->threaded is set.
- + * Clear dev->threaded if kthread creation failed so that
- + * threaded mode will not be enabled in napi_enable().
- + */
- + if (dev->threaded && napi_kthread_create(napi))
- + dev->threaded = 0;
- }
- EXPORT_SYMBOL(netif_napi_add);
-
- @@ -6774,9 +6817,28 @@ void napi_disable(struct napi_struct *n)
- hrtimer_cancel(&n->timer);
-
- clear_bit(NAPI_STATE_DISABLE, &n->state);
- + clear_bit(NAPI_STATE_THREADED, &n->state);
- }
- EXPORT_SYMBOL(napi_disable);
-
- +/**
- + * napi_enable - enable NAPI scheduling
- + * @n: NAPI context
- + *
- + * Resume NAPI from being scheduled on this context.
- + * Must be paired with napi_disable.
- + */
- +void napi_enable(struct napi_struct *n)
- +{
- + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- + smp_mb__before_atomic();
- + clear_bit(NAPI_STATE_SCHED, &n->state);
- + clear_bit(NAPI_STATE_NPSVC, &n->state);
- + if (n->dev->threaded && n->thread)
- + set_bit(NAPI_STATE_THREADED, &n->state);
- +}
- +EXPORT_SYMBOL(napi_enable);
- +
- static void flush_gro_hash(struct napi_struct *napi)
- {
- int i;
- @@ -6802,6 +6864,11 @@ void __netif_napi_del(struct napi_struct
-
- flush_gro_hash(napi);
- napi->gro_bitmask = 0;
- +
- + if (napi->thread) {
- + kthread_stop(napi->thread);
- + napi->thread = NULL;
- + }
- }
- EXPORT_SYMBOL(__netif_napi_del);
-
- @@ -6883,6 +6950,51 @@ static int napi_poll(struct napi_struct
- return work;
- }
-
- +static int napi_thread_wait(struct napi_struct *napi)
- +{
- + set_current_state(TASK_INTERRUPTIBLE);
- +
- + while (!kthread_should_stop() && !napi_disable_pending(napi)) {
- + if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
- + WARN_ON(!list_empty(&napi->poll_list));
- + __set_current_state(TASK_RUNNING);
- + return 0;
- + }
- +
- + schedule();
- + set_current_state(TASK_INTERRUPTIBLE);
- + }
- + __set_current_state(TASK_RUNNING);
- + return -1;
- +}
- +
- +static int napi_threaded_poll(void *data)
- +{
- + struct napi_struct *napi = data;
- + void *have;
- +
- + while (!napi_thread_wait(napi)) {
- + for (;;) {
- + bool repoll = false;
- +
- + local_bh_disable();
- +
- + have = netpoll_poll_lock(napi);
- + __napi_poll(napi, &repoll);
- + netpoll_poll_unlock(have);
- +
- + __kfree_skb_flush();
- + local_bh_enable();
- +
- + if (!repoll)
- + break;
- +
- + cond_resched();
- + }
- + }
- + return 0;
- +}
- +
- static __latent_entropy void net_rx_action(struct softirq_action *h)
- {
- struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|