0018-x86-entry-64-Refactor-IRQ-stacks-and-make-them-NMI-s.patch 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. From 2a767692d6140051e569ab59a1440b3760839e03 Mon Sep 17 00:00:00 2001
  2. From: Andy Lutomirski <[email protected]>
  3. Date: Tue, 11 Jul 2017 10:33:38 -0500
  4. Subject: [PATCH 018/241] x86/entry/64: Refactor IRQ stacks and make them
  5. NMI-safe
  6. MIME-Version: 1.0
  7. Content-Type: text/plain; charset=UTF-8
  8. Content-Transfer-Encoding: 8bit
  9. CVE-2017-5754
  10. This will allow IRQ stacks to nest inside NMIs or similar entries
  11. that can happen during IRQ stack setup or teardown.
  12. The new macros won't work correctly if they're invoked with IRQs on.
  13. Add a check under CONFIG_DEBUG_ENTRY to detect that.
  14. Signed-off-by: Andy Lutomirski <[email protected]>
  15. [ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
  16. and ORC unwinder's lives a little easier. ]
  17. Signed-off-by: Josh Poimboeuf <[email protected]>
  18. Cc: Borislav Petkov <[email protected]>
  19. Cc: Brian Gerst <[email protected]>
  20. Cc: Denys Vlasenko <[email protected]>
  21. Cc: H. Peter Anvin <[email protected]>
  22. Cc: Jiri Slaby <[email protected]>
  23. Cc: Linus Torvalds <[email protected]>
  24. Cc: Mike Galbraith <[email protected]>
  25. Cc: Peter Zijlstra <[email protected]>
  26. Cc: Thomas Gleixner <[email protected]>
  27. Cc: [email protected]
  28. Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com
  29. Signed-off-by: Ingo Molnar <[email protected]>
  30. (cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938)
  31. Signed-off-by: Andy Whitcroft <[email protected]>
  32. Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
  33. (cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0)
  34. Signed-off-by: Fabian Grünbichler <[email protected]>
  35. ---
  36. arch/x86/kernel/process_64.c | 3 ++
  37. arch/x86/Kconfig.debug | 2 --
  38. arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++-------------
  39. 3 files changed, 64 insertions(+), 26 deletions(-)
  40. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
  41. index fe56e6f93cbb..1e7701c4cd80 100644
  42. --- a/arch/x86/kernel/process_64.c
  43. +++ b/arch/x86/kernel/process_64.c
  44. @@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  45. int cpu = smp_processor_id();
  46. struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
  47. + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
  48. + this_cpu_read(irq_count) != -1);
  49. +
  50. switch_fpu_prepare(prev_fpu, cpu);
  51. /* We must save %fs and %gs before load_TLS() because
  52. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
  53. index cd20ca0b4043..1fc519f3c49e 100644
  54. --- a/arch/x86/Kconfig.debug
  55. +++ b/arch/x86/Kconfig.debug
  56. @@ -305,8 +305,6 @@ config DEBUG_ENTRY
  57. Some of these sanity checks may slow down kernel entries and
  58. exits or otherwise impact performance.
  59. - This is currently used to help test NMI code.
  60. -
  61. If unsure, say N.
  62. config DEBUG_NMI_SELFTEST
  63. diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
  64. index 6d078b89a5e8..07b4056af8a8 100644
  65. --- a/arch/x86/entry/entry_64.S
  66. +++ b/arch/x86/entry/entry_64.S
  67. @@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
  68. .endr
  69. END(irq_entries_start)
  70. +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
  71. +#ifdef CONFIG_DEBUG_ENTRY
  72. + pushfq
  73. + testl $X86_EFLAGS_IF, (%rsp)
  74. + jz .Lokay_\@
  75. + ud2
  76. +.Lokay_\@:
  77. + addq $8, %rsp
  78. +#endif
  79. +.endm
  80. +
  81. +/*
  82. + * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
  83. + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
  84. + * Requires kernel GSBASE.
  85. + *
  86. + * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
  87. + */
  88. +.macro ENTER_IRQ_STACK old_rsp
  89. + DEBUG_ENTRY_ASSERT_IRQS_OFF
  90. + movq %rsp, \old_rsp
  91. + incl PER_CPU_VAR(irq_count)
  92. +
  93. + /*
  94. + * Right now, if we just incremented irq_count to zero, we've
  95. + * claimed the IRQ stack but we haven't switched to it yet.
  96. + *
  97. + * If anything is added that can interrupt us here without using IST,
  98. + * it must be *extremely* careful to limit its stack usage. This
  99. + * could include kprobes and a hypothetical future IST-less #DB
  100. + * handler.
  101. + */
  102. +
  103. + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
  104. + pushq \old_rsp
  105. +.endm
  106. +
  107. +/*
  108. + * Undoes ENTER_IRQ_STACK.
  109. + */
  110. +.macro LEAVE_IRQ_STACK
  111. + DEBUG_ENTRY_ASSERT_IRQS_OFF
  112. + /* We need to be off the IRQ stack before decrementing irq_count. */
  113. + popq %rsp
  114. +
  115. + /*
  116. + * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
  117. + * the irq stack but we're not on it.
  118. + */
  119. +
  120. + decl PER_CPU_VAR(irq_count)
  121. +.endm
  122. +
  123. /*
  124. * Interrupt entry/exit.
  125. *
  126. @@ -485,17 +538,7 @@ END(irq_entries_start)
  127. CALL_enter_from_user_mode
  128. 1:
  129. - /*
  130. - * Save previous stack pointer, optionally switch to interrupt stack.
  131. - * irq_count is used to check if a CPU is already on an interrupt stack
  132. - * or not. While this is essentially redundant with preempt_count it is
  133. - * a little cheaper to use a separate counter in the PDA (short of
  134. - * moving irq_enter into assembly, which would be too much work)
  135. - */
  136. - movq %rsp, %rdi
  137. - incl PER_CPU_VAR(irq_count)
  138. - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
  139. - pushq %rdi
  140. + ENTER_IRQ_STACK old_rsp=%rdi
  141. /* We entered an interrupt context - irqs are off: */
  142. TRACE_IRQS_OFF
  143. @@ -515,10 +558,8 @@ common_interrupt:
  144. ret_from_intr:
  145. DISABLE_INTERRUPTS(CLBR_ANY)
  146. TRACE_IRQS_OFF
  147. - decl PER_CPU_VAR(irq_count)
  148. - /* Restore saved previous stack */
  149. - popq %rsp
  150. + LEAVE_IRQ_STACK
  151. testb $3, CS(%rsp)
  152. jz retint_kernel
  153. @@ -892,12 +933,10 @@ bad_gs:
  154. ENTRY(do_softirq_own_stack)
  155. pushq %rbp
  156. mov %rsp, %rbp
  157. - incl PER_CPU_VAR(irq_count)
  158. - cmove PER_CPU_VAR(irq_stack_ptr), %rsp
  159. - push %rbp /* frame pointer backlink */
  160. + ENTER_IRQ_STACK old_rsp=%r11
  161. call __do_softirq
  162. + LEAVE_IRQ_STACK
  163. leaveq
  164. - decl PER_CPU_VAR(irq_count)
  165. ret
  166. END(do_softirq_own_stack)
  167. @@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
  168. * see the correct pointer to the pt_regs
  169. */
  170. movq %rdi, %rsp /* we don't return, adjust the stack frame */
  171. -11: incl PER_CPU_VAR(irq_count)
  172. - movq %rsp, %rbp
  173. - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
  174. - pushq %rbp /* frame pointer backlink */
  175. +
  176. + ENTER_IRQ_STACK old_rsp=%r10
  177. call xen_evtchn_do_upcall
  178. - popq %rsp
  179. - decl PER_CPU_VAR(irq_count)
  180. + LEAVE_IRQ_STACK
  181. +
  182. #ifndef CONFIG_PREEMPT
  183. call xen_maybe_preempt_hcall
  184. #endif
  185. --
  186. 2.14.2