601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. From: Wei Wang <[email protected]>
  2. Date: Mon, 8 Feb 2021 11:34:09 -0800
  3. Subject: [PATCH] net: implement threaded-able napi poll loop support
  4. This patch allows running each napi poll loop inside its own
  5. kernel thread.
  6. The kthread is created during netif_napi_add() if dev->threaded
  7. is set. And threaded mode is enabled in napi_enable(). We will
  8. provide a way to set dev->threaded and enable threaded mode
  9. without a device up/down in the following patch.
  10. Once that threaded mode is enabled and the kthread is
  11. started, napi_schedule() will wake-up such thread instead
  12. of scheduling the softirq.
  13. The threaded poll loop behaves quite likely the net_rx_action,
  14. but it does not have to manipulate local irqs and uses
  15. an explicit scheduling point based on netdev_budget.
  16. Co-developed-by: Paolo Abeni <[email protected]>
  17. Signed-off-by: Paolo Abeni <[email protected]>
  18. Co-developed-by: Hannes Frederic Sowa <[email protected]>
  19. Signed-off-by: Hannes Frederic Sowa <[email protected]>
  20. Co-developed-by: Jakub Kicinski <[email protected]>
  21. Signed-off-by: Jakub Kicinski <[email protected]>
  22. Signed-off-by: Wei Wang <[email protected]>
  23. Reviewed-by: Alexander Duyck <[email protected]>
  24. Signed-off-by: David S. Miller <[email protected]>
  25. ---
  26. --- a/include/linux/netdevice.h
  27. +++ b/include/linux/netdevice.h
  28. @@ -347,6 +347,7 @@ struct napi_struct {
  29. struct list_head dev_list;
  30. struct hlist_node napi_hash_node;
  31. unsigned int napi_id;
  32. + struct task_struct *thread;
  33. };
  34. enum {
  35. @@ -357,6 +358,7 @@ enum {
  36. NAPI_STATE_LISTED, /* NAPI added to system lists */
  37. NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
  38. NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
  39. + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
  40. };
  41. enum {
  42. @@ -367,6 +369,7 @@ enum {
  43. NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
  44. NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
  45. NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
  46. + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
  47. };
  48. enum gro_result {
  49. @@ -497,20 +500,7 @@ static inline bool napi_complete(struct
  50. */
  51. void napi_disable(struct napi_struct *n);
  52. -/**
  53. - * napi_enable - enable NAPI scheduling
  54. - * @n: NAPI context
  55. - *
  56. - * Resume NAPI from being scheduled on this context.
  57. - * Must be paired with napi_disable.
  58. - */
  59. -static inline void napi_enable(struct napi_struct *n)
  60. -{
  61. - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
  62. - smp_mb__before_atomic();
  63. - clear_bit(NAPI_STATE_SCHED, &n->state);
  64. - clear_bit(NAPI_STATE_NPSVC, &n->state);
  65. -}
  66. +void napi_enable(struct napi_struct *n);
  67. /**
  68. * napi_synchronize - wait until NAPI is not running
  69. @@ -1842,6 +1832,8 @@ enum netdev_ml_priv_type {
  70. *
  71. * @wol_enabled: Wake-on-LAN is enabled
  72. *
  73. + * @threaded: napi threaded mode is enabled
  74. + *
  75. * @net_notifier_list: List of per-net netdev notifier block
  76. * that follow this device when it is moved
  77. * to another network namespace.
  78. @@ -2161,6 +2153,7 @@ struct net_device {
  79. struct lock_class_key *qdisc_running_key;
  80. bool proto_down;
  81. unsigned wol_enabled:1;
  82. + unsigned threaded:1;
  83. struct list_head net_notifier_list;
  84. --- a/net/core/dev.c
  85. +++ b/net/core/dev.c
  86. @@ -91,6 +91,7 @@
  87. #include <linux/etherdevice.h>
  88. #include <linux/ethtool.h>
  89. #include <linux/skbuff.h>
  90. +#include <linux/kthread.h>
  91. #include <linux/bpf.h>
  92. #include <linux/bpf_trace.h>
  93. #include <net/net_namespace.h>
  94. @@ -1500,6 +1501,27 @@ void netdev_notify_peers(struct net_devi
  95. }
  96. EXPORT_SYMBOL(netdev_notify_peers);
  97. +static int napi_threaded_poll(void *data);
  98. +
  99. +static int napi_kthread_create(struct napi_struct *n)
  100. +{
  101. + int err = 0;
  102. +
  103. + /* Create and wake up the kthread once to put it in
  104. + * TASK_INTERRUPTIBLE mode to avoid the blocked task
  105. + * warning and work with loadavg.
  106. + */
  107. + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
  108. + n->dev->name, n->napi_id);
  109. + if (IS_ERR(n->thread)) {
  110. + err = PTR_ERR(n->thread);
  111. + pr_err("kthread_run failed with err %d\n", err);
  112. + n->thread = NULL;
  113. + }
  114. +
  115. + return err;
  116. +}
  117. +
  118. static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
  119. {
  120. const struct net_device_ops *ops = dev->netdev_ops;
  121. @@ -4267,6 +4289,21 @@ int gro_normal_batch __read_mostly = 8;
  122. static inline void ____napi_schedule(struct softnet_data *sd,
  123. struct napi_struct *napi)
  124. {
  125. + struct task_struct *thread;
  126. +
  127. + if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
  128. + /* Paired with smp_mb__before_atomic() in
  129. + * napi_enable(). Use READ_ONCE() to guarantee
  130. + * a complete read on napi->thread. Only call
  131. + * wake_up_process() when it's not NULL.
  132. + */
  133. + thread = READ_ONCE(napi->thread);
  134. + if (thread) {
  135. + wake_up_process(thread);
  136. + return;
  137. + }
  138. + }
  139. +
  140. list_add_tail(&napi->poll_list, &sd->poll_list);
  141. __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  142. }
  143. @@ -6758,6 +6795,12 @@ void netif_napi_add(struct net_device *d
  144. set_bit(NAPI_STATE_NPSVC, &napi->state);
  145. list_add_rcu(&napi->dev_list, &dev->napi_list);
  146. napi_hash_add(napi);
  147. + /* Create kthread for this napi if dev->threaded is set.
  148. + * Clear dev->threaded if kthread creation failed so that
  149. + * threaded mode will not be enabled in napi_enable().
  150. + */
  151. + if (dev->threaded && napi_kthread_create(napi))
  152. + dev->threaded = 0;
  153. }
  154. EXPORT_SYMBOL(netif_napi_add);
  155. @@ -6774,9 +6817,28 @@ void napi_disable(struct napi_struct *n)
  156. hrtimer_cancel(&n->timer);
  157. clear_bit(NAPI_STATE_DISABLE, &n->state);
  158. + clear_bit(NAPI_STATE_THREADED, &n->state);
  159. }
  160. EXPORT_SYMBOL(napi_disable);
  161. +/**
  162. + * napi_enable - enable NAPI scheduling
  163. + * @n: NAPI context
  164. + *
  165. + * Resume NAPI from being scheduled on this context.
  166. + * Must be paired with napi_disable.
  167. + */
  168. +void napi_enable(struct napi_struct *n)
  169. +{
  170. + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
  171. + smp_mb__before_atomic();
  172. + clear_bit(NAPI_STATE_SCHED, &n->state);
  173. + clear_bit(NAPI_STATE_NPSVC, &n->state);
  174. + if (n->dev->threaded && n->thread)
  175. + set_bit(NAPI_STATE_THREADED, &n->state);
  176. +}
  177. +EXPORT_SYMBOL(napi_enable);
  178. +
  179. static void flush_gro_hash(struct napi_struct *napi)
  180. {
  181. int i;
  182. @@ -6802,6 +6864,11 @@ void __netif_napi_del(struct napi_struct
  183. flush_gro_hash(napi);
  184. napi->gro_bitmask = 0;
  185. +
  186. + if (napi->thread) {
  187. + kthread_stop(napi->thread);
  188. + napi->thread = NULL;
  189. + }
  190. }
  191. EXPORT_SYMBOL(__netif_napi_del);
  192. @@ -6883,6 +6950,51 @@ static int napi_poll(struct napi_struct
  193. return work;
  194. }
  195. +static int napi_thread_wait(struct napi_struct *napi)
  196. +{
  197. + set_current_state(TASK_INTERRUPTIBLE);
  198. +
  199. + while (!kthread_should_stop() && !napi_disable_pending(napi)) {
  200. + if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
  201. + WARN_ON(!list_empty(&napi->poll_list));
  202. + __set_current_state(TASK_RUNNING);
  203. + return 0;
  204. + }
  205. +
  206. + schedule();
  207. + set_current_state(TASK_INTERRUPTIBLE);
  208. + }
  209. + __set_current_state(TASK_RUNNING);
  210. + return -1;
  211. +}
  212. +
  213. +static int napi_threaded_poll(void *data)
  214. +{
  215. + struct napi_struct *napi = data;
  216. + void *have;
  217. +
  218. + while (!napi_thread_wait(napi)) {
  219. + for (;;) {
  220. + bool repoll = false;
  221. +
  222. + local_bh_disable();
  223. +
  224. + have = netpoll_poll_lock(napi);
  225. + __napi_poll(napi, &repoll);
  226. + netpoll_poll_unlock(have);
  227. +
  228. + __kfree_skb_flush();
  229. + local_bh_enable();
  230. +
  231. + if (!repoll)
  232. + break;
  233. +
  234. + cond_resched();
  235. + }
  236. + }
  237. + return 0;
  238. +}
  239. +
  240. static __latent_entropy void net_rx_action(struct softirq_action *h)
  241. {
  242. struct softnet_data *sd = this_cpu_ptr(&softnet_data);