0043-x86-mm-Factor-out-CR3-building-code.patch 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001
  2. From: Andy Lutomirski <[email protected]>
  3. Date: Sun, 17 Sep 2017 09:03:48 -0700
  4. Subject: [PATCH 043/242] x86/mm: Factor out CR3-building code
  5. MIME-Version: 1.0
  6. Content-Type: text/plain; charset=UTF-8
  7. Content-Transfer-Encoding: 8bit
  8. CVE-2017-5754
  9. Current, the code that assembles a value to load into CR3 is
  10. open-coded everywhere. Factor it out into helpers build_cr3() and
  11. build_cr3_noflush().
  12. This makes one semantic change: __get_current_cr3_fast() was wrong
  13. on SME systems. No one noticed because the only caller is in the
  14. VMX code, and there are no CPUs with both SME and VMX.
  15. Signed-off-by: Andy Lutomirski <[email protected]>
  16. Cc: Borislav Petkov <[email protected]>
  17. Cc: Linus Torvalds <[email protected]>
  18. Cc: Peter Zijlstra <[email protected]>
  19. Cc: Thomas Gleixner <[email protected]>
  20. Cc: Tom Lendacky <[email protected]>
  21. Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
  22. Signed-off-by: Ingo Molnar <[email protected]>
  23. (backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
  24. Signed-off-by: Andy Whitcroft <[email protected]>
  25. Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
  26. (cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
  27. Signed-off-by: Fabian Grünbichler <[email protected]>
  28. ---
  29. arch/x86/include/asm/mmu_context.h | 15 ++++++---
  30. arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
  31. 2 files changed, 75 insertions(+), 8 deletions(-)
  32. diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
  33. index 7ae318c340d9..a999ba6b721f 100644
  34. --- a/arch/x86/include/asm/mmu_context.h
  35. +++ b/arch/x86/include/asm/mmu_context.h
  36. @@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  37. return __pkru_allows_pkey(vma_pkey(vma), write);
  38. }
  39. +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
  40. +{
  41. + return __sme_pa(mm->pgd) | asid;
  42. +}
  43. +
  44. +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
  45. +{
  46. + return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
  47. +}
  48. /*
  49. * This can be used from process context to figure out what the value of
  50. @@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  51. */
  52. static inline unsigned long __get_current_cr3_fast(void)
  53. {
  54. - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
  55. -
  56. - if (static_cpu_has(X86_FEATURE_PCID))
  57. - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
  58. + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
  59. + this_cpu_read(cpu_tlbstate.loaded_mm_asid));
  60. /* For now, be very restrictive about when this can be called. */
  61. VM_WARN_ON(in_nmi() || preemptible());
  62. diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
  63. index 57943b4d8f2e..440400316c8a 100644
  64. --- a/arch/x86/mm/tlb.c
  65. +++ b/arch/x86/mm/tlb.c
  66. @@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  67. * without going through leave_mm() / switch_mm_irqs_off() or that
  68. * does something like write_cr3(read_cr3_pa()).
  69. */
  70. - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
  71. +#ifdef CONFIG_DEBUG_VM
  72. + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
  73. + /*
  74. + * If we were to BUG here, we'd be very likely to kill
  75. + * the system so hard that we don't see the call trace.
  76. + * Try to recover instead by ignoring the error and doing
  77. + * a global flush to minimize the chance of corruption.
  78. + *
  79. + * (This is far from being a fully correct recovery.
  80. + * Architecturally, the CPU could prefetch something
  81. + * back into an incorrect ASID slot and leave it there
  82. + * to cause trouble down the road. It's better than
  83. + * nothing, though.)
  84. + */
  85. + __flush_tlb_all();
  86. + }
  87. +#endif
  88. if (real_prev == next) {
  89. VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
  90. @@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  91. */
  92. this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
  93. next_tlb_gen);
  94. - write_cr3(__pa(next->pgd) | prev_asid);
  95. + write_cr3(build_cr3(next, prev_asid));
  96. /*
  97. * This gets called via leave_mm() in the idle path
  98. @@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  99. if (need_flush) {
  100. this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
  101. this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
  102. - write_cr3(__pa(next->pgd) | new_asid);
  103. + write_cr3(build_cr3(next, new_asid));
  104. trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
  105. TLB_FLUSH_ALL);
  106. } else {
  107. /* The new ASID is already up to date. */
  108. - write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
  109. + write_cr3(build_cr3_noflush(next, new_asid));
  110. trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
  111. }
  112. @@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  113. switch_ldt(real_prev, next);
  114. }
  115. +/*
  116. + * Call this when reinitializing a CPU. It fixes the following potential
  117. + * problems:
  118. + *
  119. + * - The ASID changed from what cpu_tlbstate thinks it is (most likely
  120. + * because the CPU was taken down and came back up with CR3's PCID
  121. + * bits clear. CPU hotplug can do this.
  122. + *
  123. + * - The TLB contains junk in slots corresponding to inactive ASIDs.
  124. + *
  125. + * - The CPU went so far out to lunch that it may have missed a TLB
  126. + * flush.
  127. + */
  128. +void initialize_tlbstate_and_flush(void)
  129. +{
  130. + int i;
  131. + struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
  132. + u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
  133. + unsigned long cr3 = __read_cr3();
  134. +
  135. + /* Assert that CR3 already references the right mm. */
  136. + WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
  137. +
  138. + /*
  139. + * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
  140. + * doesn't work like other CR4 bits because it can only be set from
  141. + * long mode.)
  142. + */
  143. + WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
  144. + !(cr4_read_shadow() & X86_CR4_PCIDE));
  145. +
  146. + /* Force ASID 0 and force a TLB flush. */
  147. + write_cr3(build_cr3(mm, 0));
  148. +
  149. + /* Reinitialize tlbstate. */
  150. + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
  151. + this_cpu_write(cpu_tlbstate.next_asid, 1);
  152. + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
  153. + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
  154. +
  155. + for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
  156. + this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
  157. +}
  158. +
  159. /*
  160. * flush_tlb_func_common()'s memory ordering requirement is that any
  161. * TLB fills that happen after we flush the TLB are ordered after we
  162. --
  163. 2.14.2