836-v6.7-leds-trigger-netdev-fix-RTNL-handling-to-prevent-pot.patch 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. From fe2b1226656afae56702d1d84c6900f6b67df297 Mon Sep 17 00:00:00 2001
  2. From: Heiner Kallweit <[email protected]>
  3. Date: Fri, 1 Dec 2023 11:23:22 +0100
  4. Subject: [PATCH] leds: trigger: netdev: fix RTNL handling to prevent potential
  5. deadlock
  6. When working on LED support for r8169 I got the following lockdep
  7. warning. Easiest way to prevent this scenario seems to be to take
  8. the RTNL lock before the trigger_data lock in set_device_name().
  9. ======================================================
  10. WARNING: possible circular locking dependency detected
  11. 6.7.0-rc2-next-20231124+ #2 Not tainted
  12. ------------------------------------------------------
  13. bash/383 is trying to acquire lock:
  14. ffff888103aa1c68 (&trigger_data->lock){+.+.}-{3:3}, at: netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  15. but task is already holding lock:
  16. ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20
  17. which lock already depends on the new lock.
  18. the existing dependency chain (in reverse order) is:
  19. -> #1 (rtnl_mutex){+.+.}-{3:3}:
  20. __mutex_lock+0x9b/0xb50
  21. mutex_lock_nested+0x16/0x20
  22. rtnl_lock+0x12/0x20
  23. set_device_name+0xa9/0x120 [ledtrig_netdev]
  24. netdev_trig_activate+0x1a1/0x230 [ledtrig_netdev]
  25. led_trigger_set+0x172/0x2c0
  26. led_trigger_write+0xf1/0x140
  27. sysfs_kf_bin_write+0x5d/0x80
  28. kernfs_fop_write_iter+0x15d/0x210
  29. vfs_write+0x1f0/0x510
  30. ksys_write+0x6c/0xf0
  31. __x64_sys_write+0x14/0x20
  32. do_syscall_64+0x3f/0xf0
  33. entry_SYSCALL_64_after_hwframe+0x6c/0x74
  34. -> #0 (&trigger_data->lock){+.+.}-{3:3}:
  35. __lock_acquire+0x1459/0x25a0
  36. lock_acquire+0xc8/0x2d0
  37. __mutex_lock+0x9b/0xb50
  38. mutex_lock_nested+0x16/0x20
  39. netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  40. call_netdevice_register_net_notifiers+0x5a/0x100
  41. register_netdevice_notifier+0x85/0x120
  42. netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev]
  43. led_trigger_set+0x172/0x2c0
  44. led_trigger_write+0xf1/0x140
  45. sysfs_kf_bin_write+0x5d/0x80
  46. kernfs_fop_write_iter+0x15d/0x210
  47. vfs_write+0x1f0/0x510
  48. ksys_write+0x6c/0xf0
  49. __x64_sys_write+0x14/0x20
  50. do_syscall_64+0x3f/0xf0
  51. entry_SYSCALL_64_after_hwframe+0x6c/0x74
  52. other info that might help us debug this:
  53. Possible unsafe locking scenario:
  54. CPU0 CPU1
  55. ---- ----
  56. lock(rtnl_mutex);
  57. lock(&trigger_data->lock);
  58. lock(rtnl_mutex);
  59. lock(&trigger_data->lock);
  60. *** DEADLOCK ***
  61. 8 locks held by bash/383:
  62. #0: ffff888103ff33f0 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x6c/0xf0
  63. #1: ffff888103aa1e88 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x114/0x210
  64. #2: ffff8881036f1890 (kn->active#82){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x11d/0x210
  65. #3: ffff888108e2c358 (&led_cdev->led_access){+.+.}-{3:3}, at: led_trigger_write+0x30/0x140
  66. #4: ffffffff8cdd9e10 (triggers_list_lock){++++}-{3:3}, at: led_trigger_write+0x75/0x140
  67. #5: ffff888108e2c270 (&led_cdev->trigger_lock){++++}-{3:3}, at: led_trigger_write+0xe3/0x140
  68. #6: ffffffff8cdde3d0 (pernet_ops_rwsem){++++}-{3:3}, at: register_netdevice_notifier+0x1c/0x120
  69. #7: ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20
  70. stack backtrace:
  71. CPU: 0 PID: 383 Comm: bash Not tainted 6.7.0-rc2-next-20231124+ #2
  72. Hardware name: Default string Default string/Default string, BIOS ADLN.M6.SODIMM.ZB.CY.015 08/08/2023
  73. Call Trace:
  74. <TASK>
  75. dump_stack_lvl+0x5c/0xd0
  76. dump_stack+0x10/0x20
  77. print_circular_bug+0x2dd/0x410
  78. check_noncircular+0x131/0x150
  79. __lock_acquire+0x1459/0x25a0
  80. lock_acquire+0xc8/0x2d0
  81. ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  82. __mutex_lock+0x9b/0xb50
  83. ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  84. ? __this_cpu_preempt_check+0x13/0x20
  85. ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  86. ? __cancel_work_timer+0x11c/0x1b0
  87. ? __mutex_lock+0x123/0xb50
  88. mutex_lock_nested+0x16/0x20
  89. ? mutex_lock_nested+0x16/0x20
  90. netdev_trig_notify+0xec/0x190 [ledtrig_netdev]
  91. call_netdevice_register_net_notifiers+0x5a/0x100
  92. register_netdevice_notifier+0x85/0x120
  93. netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev]
  94. led_trigger_set+0x172/0x2c0
  95. ? preempt_count_add+0x49/0xc0
  96. led_trigger_write+0xf1/0x140
  97. sysfs_kf_bin_write+0x5d/0x80
  98. kernfs_fop_write_iter+0x15d/0x210
  99. vfs_write+0x1f0/0x510
  100. ksys_write+0x6c/0xf0
  101. __x64_sys_write+0x14/0x20
  102. do_syscall_64+0x3f/0xf0
  103. entry_SYSCALL_64_after_hwframe+0x6c/0x74
  104. RIP: 0033:0x7f269055d034
  105. Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d 35 c3 0d 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48
  106. RSP: 002b:00007ffddb7ef748 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
  107. RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 00007f269055d034
  108. RDX: 0000000000000007 RSI: 000055bf5f4af3c0 RDI: 0000000000000001
  109. RBP: 000055bf5f4af3c0 R08: 0000000000000073 R09: 0000000000000001
  110. R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000007
  111. R13: 00007f26906325c0 R14: 00007f269062ff20 R15: 0000000000000000
  112. </TASK>
  113. Fixes: d5e01266e7f5 ("leds: trigger: netdev: add additional specific link speed mode")
  114. Cc: [email protected]
  115. Signed-off-by: Heiner Kallweit <[email protected]>
  116. Reviewed-by: Andrew Lunn <[email protected]>
  117. Acked-by: Lee Jones <[email protected]>
  118. Link: https://lore.kernel.org/r/[email protected]
  119. Signed-off-by: Jakub Kicinski <[email protected]>
  120. ---
  121. drivers/leds/trigger/ledtrig-netdev.c | 11 +++++++----
  122. 1 file changed, 7 insertions(+), 4 deletions(-)
  123. --- a/drivers/leds/trigger/ledtrig-netdev.c
  124. +++ b/drivers/leds/trigger/ledtrig-netdev.c
  125. @@ -235,6 +235,11 @@ static int set_device_name(struct led_ne
  126. {
  127. cancel_delayed_work_sync(&trigger_data->work);
  128. + /*
  129. + * Take RTNL lock before trigger_data lock to prevent potential
  130. + * deadlock with netdev notifier registration.
  131. + */
  132. + rtnl_lock();
  133. mutex_lock(&trigger_data->lock);
  134. if (trigger_data->net_dev) {
  135. @@ -254,16 +259,14 @@ static int set_device_name(struct led_ne
  136. trigger_data->carrier_link_up = false;
  137. trigger_data->link_speed = SPEED_UNKNOWN;
  138. trigger_data->duplex = DUPLEX_UNKNOWN;
  139. - if (trigger_data->net_dev != NULL) {
  140. - rtnl_lock();
  141. + if (trigger_data->net_dev)
  142. get_device_state(trigger_data);
  143. - rtnl_unlock();
  144. - }
  145. trigger_data->last_activity = 0;
  146. set_baseline_state(trigger_data);
  147. mutex_unlock(&trigger_data->lock);
  148. + rtnl_unlock();
  149. return 0;
  150. }