603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. From: Wei Wang <[email protected]>
  2. Date: Mon, 1 Mar 2021 17:21:13 -0800
  3. Subject: [PATCH] net: fix race between napi kthread mode and busy poll
  4. Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
  5. determine if the kthread owns this napi and could call napi->poll() on
  6. it. However, if socket busy poll is enabled, it is possible that the
  7. busy poll thread grabs this SCHED bit (after the previous napi->poll()
  8. invokes napi_complete_done() and clears SCHED bit) and tries to poll
  9. on the same napi. napi_disable() could grab the SCHED bit as well.
  10. This patch tries to fix this race by adding a new bit
  11. NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
  12. ____napi_schedule() if the threaded mode is enabled, and gets cleared
  13. in napi_complete_done(), and we only poll the napi in kthread if this
  14. bit is set. This helps distinguish the ownership of the napi between
  15. kthread and other scenarios and fixes the race issue.
  16. Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
  17. Reported-by: Martin Zaharinov <[email protected]>
  18. Suggested-by: Jakub Kicinski <[email protected]>
  19. Signed-off-by: Wei Wang <[email protected]>
  20. Cc: Alexander Duyck <[email protected]>
  21. Cc: Eric Dumazet <[email protected]>
  22. Cc: Paolo Abeni <[email protected]>
  23. Cc: Hannes Frederic Sowa <[email protected]>
  24. ---
  25. --- a/include/linux/netdevice.h
  26. +++ b/include/linux/netdevice.h
  27. @@ -359,6 +359,7 @@ enum {
  28. NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
  29. NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
  30. NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
  31. + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
  32. };
  33. enum {
  34. @@ -370,6 +371,7 @@ enum {
  35. NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
  36. NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
  37. NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
  38. + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
  39. };
  40. enum gro_result {
  41. --- a/net/core/dev.c
  42. +++ b/net/core/dev.c
  43. @@ -4304,6 +4304,8 @@ static inline void ____napi_schedule(str
  44. */
  45. thread = READ_ONCE(napi->thread);
  46. if (thread) {
  47. + if (thread->state != TASK_INTERRUPTIBLE)
  48. + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
  49. wake_up_process(thread);
  50. return;
  51. }
  52. @@ -6564,7 +6566,8 @@ bool napi_complete_done(struct napi_stru
  53. WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
  54. - new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
  55. + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
  56. + NAPIF_STATE_SCHED_THREADED);
  57. /* If STATE_MISSED was set, leave STATE_SCHED set,
  58. * because we will call napi->poll() one more time.
  59. @@ -7000,16 +7003,25 @@ static int napi_poll(struct napi_struct
  60. static int napi_thread_wait(struct napi_struct *napi)
  61. {
  62. + bool woken = false;
  63. +
  64. set_current_state(TASK_INTERRUPTIBLE);
  65. while (!kthread_should_stop() && !napi_disable_pending(napi)) {
  66. - if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
  67. + /* Testing SCHED_THREADED bit here to make sure the current
  68. + * kthread owns this napi and could poll on this napi.
  69. + * Testing SCHED bit is not enough because SCHED bit might be
  70. + * set by some other busy poll thread or by napi_disable().
  71. + */
  72. + if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
  73. WARN_ON(!list_empty(&napi->poll_list));
  74. __set_current_state(TASK_RUNNING);
  75. return 0;
  76. }
  77. schedule();
  78. + /* woken being true indicates this thread owns this napi. */
  79. + woken = true;
  80. set_current_state(TASK_INTERRUPTIBLE);
  81. }
  82. __set_current_state(TASK_RUNNING);