| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001
- From: Andy Lutomirski <[email protected]>
- Date: Sun, 17 Sep 2017 09:03:48 -0700
- Subject: [PATCH 043/242] x86/mm: Factor out CR3-building code
- MIME-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- CVE-2017-5754
- Current, the code that assembles a value to load into CR3 is
- open-coded everywhere. Factor it out into helpers build_cr3() and
- build_cr3_noflush().
- This makes one semantic change: __get_current_cr3_fast() was wrong
- on SME systems. No one noticed because the only caller is in the
- VMX code, and there are no CPUs with both SME and VMX.
- Signed-off-by: Andy Lutomirski <[email protected]>
- Cc: Borislav Petkov <[email protected]>
- Cc: Linus Torvalds <[email protected]>
- Cc: Peter Zijlstra <[email protected]>
- Cc: Thomas Gleixner <[email protected]>
- Cc: Tom Lendacky <[email protected]>
- Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
- Signed-off-by: Ingo Molnar <[email protected]>
- (backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
- Signed-off-by: Andy Whitcroft <[email protected]>
- Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
- (cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
- Signed-off-by: Fabian Grünbichler <[email protected]>
- ---
- arch/x86/include/asm/mmu_context.h | 15 ++++++---
- arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
- 2 files changed, 75 insertions(+), 8 deletions(-)
- diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
- index 7ae318c340d9..a999ba6b721f 100644
- --- a/arch/x86/include/asm/mmu_context.h
- +++ b/arch/x86/include/asm/mmu_context.h
- @@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
- return __pkru_allows_pkey(vma_pkey(vma), write);
- }
-
- +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
- +{
- + return __sme_pa(mm->pgd) | asid;
- +}
- +
- +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
- +{
- + return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
- +}
-
- /*
- * This can be used from process context to figure out what the value of
- @@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
- */
- static inline unsigned long __get_current_cr3_fast(void)
- {
- - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
- -
- - if (static_cpu_has(X86_FEATURE_PCID))
- - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
- + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
- + this_cpu_read(cpu_tlbstate.loaded_mm_asid));
-
- /* For now, be very restrictive about when this can be called. */
- VM_WARN_ON(in_nmi() || preemptible());
- diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
- index 57943b4d8f2e..440400316c8a 100644
- --- a/arch/x86/mm/tlb.c
- +++ b/arch/x86/mm/tlb.c
- @@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- * without going through leave_mm() / switch_mm_irqs_off() or that
- * does something like write_cr3(read_cr3_pa()).
- */
- - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
- +#ifdef CONFIG_DEBUG_VM
- + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
- + /*
- + * If we were to BUG here, we'd be very likely to kill
- + * the system so hard that we don't see the call trace.
- + * Try to recover instead by ignoring the error and doing
- + * a global flush to minimize the chance of corruption.
- + *
- + * (This is far from being a fully correct recovery.
- + * Architecturally, the CPU could prefetch something
- + * back into an incorrect ASID slot and leave it there
- + * to cause trouble down the road. It's better than
- + * nothing, though.)
- + */
- + __flush_tlb_all();
- + }
- +#endif
-
- if (real_prev == next) {
- VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
- @@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- */
- this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
- next_tlb_gen);
- - write_cr3(__pa(next->pgd) | prev_asid);
- + write_cr3(build_cr3(next, prev_asid));
-
- /*
- * This gets called via leave_mm() in the idle path
- @@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- if (need_flush) {
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
- this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- - write_cr3(__pa(next->pgd) | new_asid);
- + write_cr3(build_cr3(next, new_asid));
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
- } else {
- /* The new ASID is already up to date. */
- - write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
- + write_cr3(build_cr3_noflush(next, new_asid));
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
- }
-
- @@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
- switch_ldt(real_prev, next);
- }
-
- +/*
- + * Call this when reinitializing a CPU. It fixes the following potential
- + * problems:
- + *
- + * - The ASID changed from what cpu_tlbstate thinks it is (most likely
- + * because the CPU was taken down and came back up with CR3's PCID
- + * bits clear. CPU hotplug can do this.
- + *
- + * - The TLB contains junk in slots corresponding to inactive ASIDs.
- + *
- + * - The CPU went so far out to lunch that it may have missed a TLB
- + * flush.
- + */
- +void initialize_tlbstate_and_flush(void)
- +{
- + int i;
- + struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
- + u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
- + unsigned long cr3 = __read_cr3();
- +
- + /* Assert that CR3 already references the right mm. */
- + WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
- +
- + /*
- + * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
- + * doesn't work like other CR4 bits because it can only be set from
- + * long mode.)
- + */
- + WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
- + !(cr4_read_shadow() & X86_CR4_PCIDE));
- +
- + /* Force ASID 0 and force a TLB flush. */
- + write_cr3(build_cr3(mm, 0));
- +
- + /* Reinitialize tlbstate. */
- + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
- + this_cpu_write(cpu_tlbstate.next_asid, 1);
- + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
- + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
- +
- + for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
- + this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
- +}
- +
- /*
- * flush_tlb_func_common()'s memory ordering requirement is that any
- * TLB fills that happen after we flush the TLB are ordered after we
- --
- 2.14.2
|