100-dump_stack-avoid-potential-deadlocks.patch 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. From: Eric Dumazet <[email protected]>
  2. Date: Fri, 22 Jan 2016 23:06:44 -0800
  3. Subject: [PATCH] dump_stack: avoid potential deadlocks
  4. Some servers experienced fatal deadlocks because of a combination
  5. of bugs, leading to multiple cpus calling dump_stack().
  6. The checksumming bug was fixed in commit 34ae6a1aa054
  7. ("ipv6: update skb->csum when CE mark is propagated").
  8. The second problem is a faulty locking in dump_stack()
  9. CPU1 runs in process context and calls dump_stack(), grabs dump_lock.
  10. CPU2 receives a TCP packet under softirq, grabs socket spinlock, and
  11. call dump_stack() from netdev_rx_csum_fault().
  12. dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since
  13. dump_lock is owned by CPU1
  14. While dumping its stack, CPU1 is interrupted by a softirq, and happens
  15. to process a packet for the TCP socket locked by CPU2.
  16. CPU1 spins forever in spin_lock() : deadlock
  17. Stack trace on CPU1 looked like :
  18. [306295.402231] NMI backtrace for cpu 1
  19. [306295.402238] RIP: 0010:[<ffffffffa9804e95>] [<ffffffffa9804e95>] _raw_spin_lock+0x25/0x30
  20. ...
  21. [306295.402255] Stack:
  22. [306295.402256] ffff88407f023cb0 ffffffffa99cbdc3 ffff88407f023ca0 ffff88012f496bb0
  23. [306295.402266] ffffffffaa4dc1f0 ffff8820d94f0dc0 000000000000000a ffffffffaa4b4280
  24. [306295.402275] ffff88407f023ce0 ffffffffa98a21d0 ffff88407f023cc0 ffff88407f023ca0
  25. [306295.402284] Call Trace:
  26. [306295.402286] <IRQ>
  27. [306295.402288]
  28. [306295.402291] [<ffffffffa99cbdc3>] tcp_v6_rcv+0x243/0x620
  29. [306295.402304] [<ffffffffa99c38af>] ip6_input_finish+0x11f/0x330
  30. [306295.402309] [<ffffffffa99c3b38>] ip6_input+0x38/0x40
  31. [306295.402313] [<ffffffffa99c3b7c>] ip6_rcv_finish+0x3c/0x90
  32. [306295.402318] [<ffffffffa99c3e79>] ipv6_rcv+0x2a9/0x500
  33. [306295.402323] [<ffffffffa989a1a1>] process_backlog+0x461/0xaa0
  34. [306295.402332] [<ffffffffa9899a57>] net_rx_action+0x147/0x430
  35. [306295.402337] [<ffffffffa980cca7>] __do_softirq+0x167/0x2d0
  36. [306295.402341] [<ffffffffa9d912dc>] call_softirq+0x1c/0x30
  37. [306295.402345] [<ffffffffa980687f>] do_softirq+0x3f/0x80
  38. [306295.402350] [<ffffffffa980cf7e>] irq_exit+0x6e/0xc0
  39. [306295.402355] [<ffffffffa980a5b5>] smp_call_function_single_interrupt+0x35/0x40
  40. [306295.402360] [<ffffffffa9d90bfa>] call_function_single_interrupt+0x6a/0x70
  41. [306295.402361] <EOI>
  42. [306295.402364]
  43. [306295.402376] [<ffffffffa9a3fa32>] printk+0x4d/0x4f
  44. [306295.402390] [<ffffffffa99e1726>] printk_address+0x31/0x33
  45. [306295.402395] [<ffffffffa99e175b>] print_trace_address+0x33/0x3c
  46. [306295.402408] [<ffffffffa99e165b>] print_context_stack+0x7f/0x119
  47. [306295.402412] [<ffffffffa99e07f1>] dump_trace+0x26b/0x28e
  48. [306295.402417] [<ffffffffa99e17b3>] show_trace_log_lvl+0x4f/0x5c
  49. [306295.402421] [<ffffffffa99e0a14>] show_stack_log_lvl+0x104/0x113
  50. [306295.402425] [<ffffffffa99e1819>] show_stack+0x42/0x44
  51. [306295.402429] [<ffffffffa9ba350a>] dump_stack+0x46/0x58
  52. [306295.402434] [<ffffffffa9d0957d>] netdev_rx_csum_fault+0x38/0x3c
  53. [306295.402439] [<ffffffffa9988e0e>] __skb_checksum_complete_head+0x6e/0x80
  54. [306295.402444] [<ffffffffa9988e31>] __skb_checksum_complete+0x11/0x20
  55. [306295.402449] [<ffffffffa98acf65>] tcp_rcv_established+0x2bd5/0x2fd0
  56. [306295.402468] [<ffffffffa99cb69c>] tcp_v6_do_rcv+0x13c/0x620
  57. [306295.402477] [<ffffffffa9984be5>] sk_backlog_rcv+0x15/0x30
  58. [306295.402482] [<ffffffffa98931e2>] release_sock+0xd2/0x150
  59. [306295.402486] [<ffffffffa98a51b1>] tcp_recvmsg+0x1c1/0xfc0
  60. [306295.402491] [<ffffffffa98b637d>] inet_recvmsg+0x7d/0x90
  61. [306295.402495] [<ffffffffa9891fcf>] sock_recvmsg+0xaf/0xe0
  62. [306295.402505] [<ffffffffa9892611>] ___sys_recvmsg+0x111/0x3b0
  63. [306295.402528] [<ffffffffa9892f5c>] SyS_recvmsg+0x5c/0xb0
  64. [306295.402532] [<ffffffffa9d8fba2>] system_call_fastpath+0x16/0x1b
  65. Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()")
  66. Signed-off-by: Eric Dumazet <[email protected]>
  67. Cc: Alex Thorlton <[email protected]>
  68. ---
  69. --- a/lib/dump_stack.c
  70. +++ b/lib/dump_stack.c
  71. @@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(
  72. asmlinkage __visible void dump_stack(void)
  73. {
  74. + unsigned long flags;
  75. int was_locked;
  76. int old;
  77. int cpu;
  78. @@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(voi
  79. * Permit this cpu to perform nested stack dumps while serialising
  80. * against other CPUs
  81. */
  82. - preempt_disable();
  83. -
  84. retry:
  85. + local_irq_save(flags);
  86. cpu = smp_processor_id();
  87. old = atomic_cmpxchg(&dump_lock, -1, cpu);
  88. if (old == -1) {
  89. @@ -43,6 +43,7 @@ retry:
  90. } else if (old == cpu) {
  91. was_locked = 1;
  92. } else {
  93. + local_irq_restore(flags);
  94. cpu_relax();
  95. goto retry;
  96. }
  97. @@ -52,7 +53,7 @@ retry:
  98. if (!was_locked)
  99. atomic_set(&dump_lock, -1);
  100. - preempt_enable();
  101. + local_irq_restore(flags);
  102. }
  103. #else
  104. asmlinkage __visible void dump_stack(void)