0211-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. From 534c2338c3f72069910c06eba7693a4a1d15faf8 Mon Sep 17 00:00:00 2001
  2. From: Dave Hansen <[email protected]>
  3. Date: Mon, 4 Dec 2017 15:08:01 +0100
  4. Subject: [PATCH 211/242] x86/mm: Use INVPCID for __native_flush_tlb_single()
  5. MIME-Version: 1.0
  6. Content-Type: text/plain; charset=UTF-8
  7. Content-Transfer-Encoding: 8bit
  8. CVE-2017-5754
  9. This uses INVPCID to shoot down individual lines of the user mapping
  10. instead of marking the entire user map as invalid. This
  11. could/might/possibly be faster.
  12. This for sure needs tlb_single_page_flush_ceiling to be redetermined;
  13. esp. since INVPCID is _slow_.
  14. A detailed performance analysis is available here:
  15. https://lkml.kernel.org/r/[email protected]
  16. [ Peterz: Split out from big combo patch ]
  17. Signed-off-by: Dave Hansen <[email protected]>
  18. Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
  19. Signed-off-by: Thomas Gleixner <[email protected]>
  20. Cc: Andy Lutomirski <[email protected]>
  21. Cc: Boris Ostrovsky <[email protected]>
  22. Cc: Borislav Petkov <[email protected]>
  23. Cc: Brian Gerst <[email protected]>
  24. Cc: Denys Vlasenko <[email protected]>
  25. Cc: Eduardo Valentin <[email protected]>
  26. Cc: Greg KH <[email protected]>
  27. Cc: H. Peter Anvin <[email protected]>
  28. Cc: Josh Poimboeuf <[email protected]>
  29. Cc: Juergen Gross <[email protected]>
  30. Cc: Linus Torvalds <[email protected]>
  31. Cc: Peter Zijlstra <[email protected]>
  32. Cc: Will Deacon <[email protected]>
  33. Cc: [email protected]
  34. Cc: [email protected]
  35. Cc: [email protected]
  36. Cc: [email protected]
  37. Signed-off-by: Ingo Molnar <[email protected]>
  38. (cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1)
  39. Signed-off-by: Andy Whitcroft <[email protected]>
  40. Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
  41. (cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2)
  42. Signed-off-by: Fabian Grünbichler <[email protected]>
  43. ---
  44. arch/x86/include/asm/cpufeatures.h | 1 +
  45. arch/x86/include/asm/tlbflush.h | 23 +++++++++++++-
  46. arch/x86/mm/init.c | 64 ++++++++++++++++++++++----------------
  47. 3 files changed, 60 insertions(+), 28 deletions(-)
  48. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
  49. index de4e91452de4..9b0c283afcf0 100644
  50. --- a/arch/x86/include/asm/cpufeatures.h
  51. +++ b/arch/x86/include/asm/cpufeatures.h
  52. @@ -196,6 +196,7 @@
  53. #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
  54. #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
  55. #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
  56. +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
  57. #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
  58. #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
  59. diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
  60. index 2b7b32c243f1..979e590648a5 100644
  61. --- a/arch/x86/include/asm/tlbflush.h
  62. +++ b/arch/x86/include/asm/tlbflush.h
  63. @@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid)
  64. return asid + 1;
  65. }
  66. +/*
  67. + * The user PCID is just the kernel one, plus the "switch bit".
  68. + */
  69. +static inline u16 user_pcid(u16 asid)
  70. +{
  71. + u16 ret = kern_pcid(asid);
  72. +#ifdef CONFIG_PAGE_TABLE_ISOLATION
  73. + ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
  74. +#endif
  75. + return ret;
  76. +}
  77. +
  78. struct pgd_t;
  79. static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
  80. {
  81. @@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void)
  82. /*
  83. * Using INVPCID is considerably faster than a pair of writes
  84. * to CR4 sandwiched inside an IRQ flag save/restore.
  85. + *
  86. + * Note, this works with CR4.PCIDE=0 or 1.
  87. */
  88. invpcid_flush_all();
  89. return;
  90. @@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
  91. if (!static_cpu_has(X86_FEATURE_PTI))
  92. return;
  93. - invalidate_user_asid(loaded_mm_asid);
  94. + /*
  95. + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
  96. + * Just use invalidate_user_asid() in case we are called early.
  97. + */
  98. + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
  99. + invalidate_user_asid(loaded_mm_asid);
  100. + else
  101. + invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
  102. }
  103. /*
  104. diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
  105. index caeb8a7bf0a4..80259ad8c386 100644
  106. --- a/arch/x86/mm/init.c
  107. +++ b/arch/x86/mm/init.c
  108. @@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
  109. static void setup_pcid(void)
  110. {
  111. -#ifdef CONFIG_X86_64
  112. - if (boot_cpu_has(X86_FEATURE_PCID)) {
  113. - if (boot_cpu_has(X86_FEATURE_PGE)) {
  114. - /*
  115. - * This can't be cr4_set_bits_and_update_boot() --
  116. - * the trampoline code can't handle CR4.PCIDE and
  117. - * it wouldn't do any good anyway. Despite the name,
  118. - * cr4_set_bits_and_update_boot() doesn't actually
  119. - * cause the bits in question to remain set all the
  120. - * way through the secondary boot asm.
  121. - *
  122. - * Instead, we brute-force it and set CR4.PCIDE
  123. - * manually in start_secondary().
  124. - */
  125. - cr4_set_bits(X86_CR4_PCIDE);
  126. - } else {
  127. - /*
  128. - * flush_tlb_all(), as currently implemented, won't
  129. - * work if PCID is on but PGE is not. Since that
  130. - * combination doesn't exist on real hardware, there's
  131. - * no reason to try to fully support it, but it's
  132. - * polite to avoid corrupting data if we're on
  133. - * an improperly configured VM.
  134. - */
  135. - setup_clear_cpu_cap(X86_FEATURE_PCID);
  136. - }
  137. + if (!IS_ENABLED(CONFIG_X86_64))
  138. + return;
  139. +
  140. + if (!boot_cpu_has(X86_FEATURE_PCID))
  141. + return;
  142. +
  143. + if (boot_cpu_has(X86_FEATURE_PGE)) {
  144. + /*
  145. + * This can't be cr4_set_bits_and_update_boot() -- the
  146. + * trampoline code can't handle CR4.PCIDE and it wouldn't
  147. + * do any good anyway. Despite the name,
  148. + * cr4_set_bits_and_update_boot() doesn't actually cause
  149. + * the bits in question to remain set all the way through
  150. + * the secondary boot asm.
  151. + *
  152. + * Instead, we brute-force it and set CR4.PCIDE manually in
  153. + * start_secondary().
  154. + */
  155. + cr4_set_bits(X86_CR4_PCIDE);
  156. +
  157. + /*
  158. + * INVPCID's single-context modes (2/3) only work if we set
  159. + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
  160. + * on systems that have X86_CR4_PCIDE clear, or that have
  161. + * no INVPCID support at all.
  162. + */
  163. + if (boot_cpu_has(X86_FEATURE_INVPCID))
  164. + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
  165. + } else {
  166. + /*
  167. + * flush_tlb_all(), as currently implemented, won't work if
  168. + * PCID is on but PGE is not. Since that combination
  169. + * doesn't exist on real hardware, there's no reason to try
  170. + * to fully support it, but it's polite to avoid corrupting
  171. + * data if we're on an improperly configured VM.
  172. + */
  173. + setup_clear_cpu_cap(X86_FEATURE_PCID);
  174. }
  175. -#endif
  176. }
  177. #ifdef CONFIG_X86_32
  178. --
  179. 2.14.2