0007-Revert-Merge-branch-mmu_notifier_fixes.patch 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825
  1. From dfd4ec1fd8d1d09930e9cf9ed7ebd07a66813337 Mon Sep 17 00:00:00 2001
  2. From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <[email protected]>
  3. Date: Wed, 29 Nov 2017 09:45:44 +0100
  4. Subject: [PATCH 7/7] Revert "Merge branch 'mmu_notifier_fixes'"
  5. MIME-Version: 1.0
  6. Content-Type: text/plain; charset=UTF-8
  7. Content-Transfer-Encoding: 8bit
  8. This reverts commit ea25c43179462e342d4a0e66c3f6a5f53514da05, reversing
  9. changes made to c227390c91a355300f47f9bef0aefbdfaaca1500.
  10. This series causes blue screens in Windows VMs running under heavy
  11. memory/swap pressure.
  12. Signed-off-by: Fabian Grünbichler <[email protected]>
  13. ---
  14. arch/arm/include/asm/kvm_host.h | 6 +++++
  15. arch/arm64/include/asm/kvm_host.h | 6 +++++
  16. arch/mips/include/asm/kvm_host.h | 5 ++++
  17. arch/powerpc/include/asm/kvm_host.h | 5 ++++
  18. arch/x86/include/asm/kvm_host.h | 2 ++
  19. include/linux/mm.h | 1 -
  20. include/linux/mmu_notifier.h | 25 +++++++++++++++++++
  21. arch/powerpc/platforms/powernv/npu-dma.c | 10 ++++++++
  22. arch/x86/kvm/x86.c | 11 +++++++++
  23. drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 31 +++++++++++++++++++++++
  24. drivers/infiniband/core/umem_odp.c | 19 +++++++++++++++
  25. drivers/infiniband/hw/hfi1/mmu_rb.c | 9 +++++++
  26. drivers/iommu/amd_iommu_v2.c | 8 ++++++
  27. drivers/iommu/intel-svm.c | 9 +++++++
  28. drivers/misc/mic/scif/scif_dma.c | 11 +++++++++
  29. drivers/misc/sgi-gru/grutlbpurge.c | 12 +++++++++
  30. drivers/xen/gntdev.c | 8 ++++++
  31. fs/dax.c | 19 ++++++---------
  32. mm/memory.c | 26 ++++----------------
  33. mm/mmu_notifier.c | 14 +++++++++++
  34. mm/rmap.c | 35 +++-----------------------
  35. virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++
  36. 22 files changed, 249 insertions(+), 65 deletions(-)
  37. diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
  38. index 4a879f6ff13b..127e2dd2e21c 100644
  39. --- a/arch/arm/include/asm/kvm_host.h
  40. +++ b/arch/arm/include/asm/kvm_host.h
  41. @@ -225,6 +225,12 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
  42. int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
  43. int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  44. +/* We do not have shadow page tables, hence the empty hooks */
  45. +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  46. + unsigned long address)
  47. +{
  48. +}
  49. +
  50. struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
  51. struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
  52. void kvm_arm_halt_guest(struct kvm *kvm);
  53. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
  54. index e923b58606e2..d68630007b14 100644
  55. --- a/arch/arm64/include/asm/kvm_host.h
  56. +++ b/arch/arm64/include/asm/kvm_host.h
  57. @@ -326,6 +326,12 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
  58. int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
  59. int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  60. +/* We do not have shadow page tables, hence the empty hooks */
  61. +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  62. + unsigned long address)
  63. +{
  64. +}
  65. +
  66. struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
  67. struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
  68. void kvm_arm_halt_guest(struct kvm *kvm);
  69. diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
  70. index a9af1d2dcd69..2998479fd4e8 100644
  71. --- a/arch/mips/include/asm/kvm_host.h
  72. +++ b/arch/mips/include/asm/kvm_host.h
  73. @@ -938,6 +938,11 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
  74. int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
  75. int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  76. +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  77. + unsigned long address)
  78. +{
  79. +}
  80. +
  81. /* Emulation */
  82. int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
  83. enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
  84. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
  85. index e372ed871c51..8b3f1238d07f 100644
  86. --- a/arch/powerpc/include/asm/kvm_host.h
  87. +++ b/arch/powerpc/include/asm/kvm_host.h
  88. @@ -67,6 +67,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
  89. extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
  90. extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
  91. +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  92. + unsigned long address)
  93. +{
  94. +}
  95. +
  96. #define HPTEG_CACHE_NUM (1 << 15)
  97. #define HPTEG_HASH_BITS_PTE 13
  98. #define HPTEG_HASH_BITS_PTE_LONG 12
  99. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
  100. index 92c9032502d8..f4d120a3e22e 100644
  101. --- a/arch/x86/include/asm/kvm_host.h
  102. +++ b/arch/x86/include/asm/kvm_host.h
  103. @@ -1375,6 +1375,8 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
  104. int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
  105. void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
  106. void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
  107. +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  108. + unsigned long address);
  109. void kvm_define_shared_msr(unsigned index, u32 msr);
  110. int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
  111. diff --git a/include/linux/mm.h b/include/linux/mm.h
  112. index 07630442bbf2..701de4b55ece 100644
  113. --- a/include/linux/mm.h
  114. +++ b/include/linux/mm.h
  115. @@ -1260,7 +1260,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
  116. void unmap_mapping_range(struct address_space *mapping,
  117. loff_t const holebegin, loff_t const holelen, int even_cows);
  118. int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
  119. - unsigned long *start, unsigned long *end,
  120. pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
  121. int follow_pfn(struct vm_area_struct *vma, unsigned long address,
  122. unsigned long *pfn);
  123. diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
  124. index 6866e8126982..947f21b451d2 100644
  125. --- a/include/linux/mmu_notifier.h
  126. +++ b/include/linux/mmu_notifier.h
  127. @@ -94,6 +94,17 @@ struct mmu_notifier_ops {
  128. unsigned long address,
  129. pte_t pte);
  130. + /*
  131. + * Before this is invoked any secondary MMU is still ok to
  132. + * read/write to the page previously pointed to by the Linux
  133. + * pte because the page hasn't been freed yet and it won't be
  134. + * freed until this returns. If required set_page_dirty has to
  135. + * be called internally to this method.
  136. + */
  137. + void (*invalidate_page)(struct mmu_notifier *mn,
  138. + struct mm_struct *mm,
  139. + unsigned long address);
  140. +
  141. /*
  142. * invalidate_range_start() and invalidate_range_end() must be
  143. * paired and are called only when the mmap_sem and/or the
  144. @@ -209,6 +220,8 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
  145. unsigned long address);
  146. extern void __mmu_notifier_change_pte(struct mm_struct *mm,
  147. unsigned long address, pte_t pte);
  148. +extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
  149. + unsigned long address);
  150. extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
  151. unsigned long start, unsigned long end);
  152. extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
  153. @@ -255,6 +268,13 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
  154. __mmu_notifier_change_pte(mm, address, pte);
  155. }
  156. +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
  157. + unsigned long address)
  158. +{
  159. + if (mm_has_notifiers(mm))
  160. + __mmu_notifier_invalidate_page(mm, address);
  161. +}
  162. +
  163. static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
  164. unsigned long start, unsigned long end)
  165. {
  166. @@ -427,6 +447,11 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
  167. {
  168. }
  169. +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
  170. + unsigned long address)
  171. +{
  172. +}
  173. +
  174. static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
  175. unsigned long start, unsigned long end)
  176. {
  177. diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
  178. index 2cb6cbea4b3b..3d4f879e687c 100644
  179. --- a/arch/powerpc/platforms/powernv/npu-dma.c
  180. +++ b/arch/powerpc/platforms/powernv/npu-dma.c
  181. @@ -614,6 +614,15 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
  182. mmio_invalidate(npu_context, 1, address, true);
  183. }
  184. +static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
  185. + struct mm_struct *mm,
  186. + unsigned long address)
  187. +{
  188. + struct npu_context *npu_context = mn_to_npu_context(mn);
  189. +
  190. + mmio_invalidate(npu_context, 1, address, true);
  191. +}
  192. +
  193. static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
  194. struct mm_struct *mm,
  195. unsigned long start, unsigned long end)
  196. @@ -631,6 +640,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
  197. static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
  198. .release = pnv_npu2_mn_release,
  199. .change_pte = pnv_npu2_mn_change_pte,
  200. + .invalidate_page = pnv_npu2_mn_invalidate_page,
  201. .invalidate_range = pnv_npu2_mn_invalidate_range,
  202. };
  203. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  204. index 7351cdc46cc7..a669b4dd51e7 100644
  205. --- a/arch/x86/kvm/x86.c
  206. +++ b/arch/x86/kvm/x86.c
  207. @@ -6734,6 +6734,17 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
  208. }
  209. EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
  210. +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
  211. + unsigned long address)
  212. +{
  213. + /*
  214. + * The physical address of apic access page is stored in the VMCS.
  215. + * Update it when it becomes invalid.
  216. + */
  217. + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
  218. + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
  219. +}
  220. +
  221. /*
  222. * Returns 1 to let vcpu_run() continue the guest execution loop without
  223. * exiting to the userspace. Otherwise, the value will be returned to the
  224. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
  225. index e1cde6b80027..6558a3ed57a7 100644
  226. --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
  227. +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
  228. @@ -146,6 +146,36 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  229. }
  230. }
  231. +/**
  232. + * amdgpu_mn_invalidate_page - callback to notify about mm change
  233. + *
  234. + * @mn: our notifier
  235. + * @mn: the mm this callback is about
  236. + * @address: address of invalidate page
  237. + *
  238. + * Invalidation of a single page. Blocks for all BOs mapping it
  239. + * and unmap them by move them into system domain again.
  240. + */
  241. +static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
  242. + struct mm_struct *mm,
  243. + unsigned long address)
  244. +{
  245. + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
  246. + struct interval_tree_node *it;
  247. +
  248. + mutex_lock(&rmn->lock);
  249. +
  250. + it = interval_tree_iter_first(&rmn->objects, address, address);
  251. + if (it) {
  252. + struct amdgpu_mn_node *node;
  253. +
  254. + node = container_of(it, struct amdgpu_mn_node, it);
  255. + amdgpu_mn_invalidate_node(node, address, address);
  256. + }
  257. +
  258. + mutex_unlock(&rmn->lock);
  259. +}
  260. +
  261. /**
  262. * amdgpu_mn_invalidate_range_start - callback to notify about mm change
  263. *
  264. @@ -185,6 +215,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
  265. static const struct mmu_notifier_ops amdgpu_mn_ops = {
  266. .release = amdgpu_mn_release,
  267. + .invalidate_page = amdgpu_mn_invalidate_page,
  268. .invalidate_range_start = amdgpu_mn_invalidate_range_start,
  269. };
  270. diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
  271. index 55e8f5ed8b3c..8c4ec564e495 100644
  272. --- a/drivers/infiniband/core/umem_odp.c
  273. +++ b/drivers/infiniband/core/umem_odp.c
  274. @@ -166,6 +166,24 @@ static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
  275. return 0;
  276. }
  277. +static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
  278. + struct mm_struct *mm,
  279. + unsigned long address)
  280. +{
  281. + struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
  282. +
  283. + if (!context->invalidate_range)
  284. + return;
  285. +
  286. + ib_ucontext_notifier_start_account(context);
  287. + down_read(&context->umem_rwsem);
  288. + rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
  289. + address + PAGE_SIZE,
  290. + invalidate_page_trampoline, NULL);
  291. + up_read(&context->umem_rwsem);
  292. + ib_ucontext_notifier_end_account(context);
  293. +}
  294. +
  295. static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
  296. u64 end, void *cookie)
  297. {
  298. @@ -219,6 +237,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
  299. static const struct mmu_notifier_ops ib_umem_notifiers = {
  300. .release = ib_umem_notifier_release,
  301. + .invalidate_page = ib_umem_notifier_invalidate_page,
  302. .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
  303. .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
  304. };
  305. diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
  306. index e4b56a0dd6d0..ccbf52c8ff6f 100644
  307. --- a/drivers/infiniband/hw/hfi1/mmu_rb.c
  308. +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
  309. @@ -67,6 +67,8 @@ struct mmu_rb_handler {
  310. static unsigned long mmu_node_start(struct mmu_rb_node *);
  311. static unsigned long mmu_node_last(struct mmu_rb_node *);
  312. +static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
  313. + unsigned long);
  314. static inline void mmu_notifier_range_start(struct mmu_notifier *,
  315. struct mm_struct *,
  316. unsigned long, unsigned long);
  317. @@ -80,6 +82,7 @@ static void do_remove(struct mmu_rb_handler *handler,
  318. static void handle_remove(struct work_struct *work);
  319. static const struct mmu_notifier_ops mn_opts = {
  320. + .invalidate_page = mmu_notifier_page,
  321. .invalidate_range_start = mmu_notifier_range_start,
  322. };
  323. @@ -282,6 +285,12 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
  324. handler->ops->remove(handler->ops_arg, node);
  325. }
  326. +static inline void mmu_notifier_page(struct mmu_notifier *mn,
  327. + struct mm_struct *mm, unsigned long addr)
  328. +{
  329. + mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
  330. +}
  331. +
  332. static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
  333. struct mm_struct *mm,
  334. unsigned long start,
  335. diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
  336. index dccf5b76eff2..6629c472eafd 100644
  337. --- a/drivers/iommu/amd_iommu_v2.c
  338. +++ b/drivers/iommu/amd_iommu_v2.c
  339. @@ -391,6 +391,13 @@ static int mn_clear_flush_young(struct mmu_notifier *mn,
  340. return 0;
  341. }
  342. +static void mn_invalidate_page(struct mmu_notifier *mn,
  343. + struct mm_struct *mm,
  344. + unsigned long address)
  345. +{
  346. + __mn_flush_page(mn, address);
  347. +}
  348. +
  349. static void mn_invalidate_range(struct mmu_notifier *mn,
  350. struct mm_struct *mm,
  351. unsigned long start, unsigned long end)
  352. @@ -429,6 +436,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
  353. static const struct mmu_notifier_ops iommu_mn = {
  354. .release = mn_release,
  355. .clear_flush_young = mn_clear_flush_young,
  356. + .invalidate_page = mn_invalidate_page,
  357. .invalidate_range = mn_invalidate_range,
  358. };
  359. diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
  360. index f620dccec8ee..f167c0d84ebf 100644
  361. --- a/drivers/iommu/intel-svm.c
  362. +++ b/drivers/iommu/intel-svm.c
  363. @@ -223,6 +223,14 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
  364. intel_flush_svm_range(svm, address, 1, 1, 0);
  365. }
  366. +static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
  367. + unsigned long address)
  368. +{
  369. + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
  370. +
  371. + intel_flush_svm_range(svm, address, 1, 1, 0);
  372. +}
  373. +
  374. /* Pages have been freed at this point */
  375. static void intel_invalidate_range(struct mmu_notifier *mn,
  376. struct mm_struct *mm,
  377. @@ -277,6 +285,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
  378. static const struct mmu_notifier_ops intel_mmuops = {
  379. .release = intel_mm_release,
  380. .change_pte = intel_change_pte,
  381. + .invalidate_page = intel_invalidate_page,
  382. .invalidate_range = intel_invalidate_range,
  383. };
  384. diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
  385. index 63d6246d6dff..64d5760d069a 100644
  386. --- a/drivers/misc/mic/scif/scif_dma.c
  387. +++ b/drivers/misc/mic/scif/scif_dma.c
  388. @@ -200,6 +200,16 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
  389. schedule_work(&scif_info.misc_work);
  390. }
  391. +static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
  392. + struct mm_struct *mm,
  393. + unsigned long address)
  394. +{
  395. + struct scif_mmu_notif *mmn;
  396. +
  397. + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
  398. + scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
  399. +}
  400. +
  401. static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
  402. struct mm_struct *mm,
  403. unsigned long start,
  404. @@ -225,6 +235,7 @@ static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
  405. static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
  406. .release = scif_mmu_notifier_release,
  407. .clear_flush_young = NULL,
  408. + .invalidate_page = scif_mmu_notifier_invalidate_page,
  409. .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
  410. .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
  411. diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
  412. index 9918eda0e05f..e936d43895d2 100644
  413. --- a/drivers/misc/sgi-gru/grutlbpurge.c
  414. +++ b/drivers/misc/sgi-gru/grutlbpurge.c
  415. @@ -247,6 +247,17 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
  416. gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
  417. }
  418. +static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
  419. + unsigned long address)
  420. +{
  421. + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
  422. + ms_notifier);
  423. +
  424. + STAT(mmu_invalidate_page);
  425. + gru_flush_tlb_range(gms, address, PAGE_SIZE);
  426. + gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
  427. +}
  428. +
  429. static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
  430. {
  431. struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
  432. @@ -258,6 +269,7 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
  433. static const struct mmu_notifier_ops gru_mmuops = {
  434. + .invalidate_page = gru_invalidate_page,
  435. .invalidate_range_start = gru_invalidate_range_start,
  436. .invalidate_range_end = gru_invalidate_range_end,
  437. .release = gru_release,
  438. diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
  439. index 82360594fa8e..f3bf8f4e2d6c 100644
  440. --- a/drivers/xen/gntdev.c
  441. +++ b/drivers/xen/gntdev.c
  442. @@ -484,6 +484,13 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
  443. mutex_unlock(&priv->lock);
  444. }
  445. +static void mn_invl_page(struct mmu_notifier *mn,
  446. + struct mm_struct *mm,
  447. + unsigned long address)
  448. +{
  449. + mn_invl_range_start(mn, mm, address, address + PAGE_SIZE);
  450. +}
  451. +
  452. static void mn_release(struct mmu_notifier *mn,
  453. struct mm_struct *mm)
  454. {
  455. @@ -515,6 +522,7 @@ static void mn_release(struct mmu_notifier *mn,
  456. static const struct mmu_notifier_ops gntdev_mmu_ops = {
  457. .release = mn_release,
  458. + .invalidate_page = mn_invl_page,
  459. .invalidate_range_start = mn_invl_range_start,
  460. };
  461. diff --git a/fs/dax.c b/fs/dax.c
  462. index fa8e358c3c6b..57da1d0a6a40 100644
  463. --- a/fs/dax.c
  464. +++ b/fs/dax.c
  465. @@ -591,10 +591,11 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
  466. pte_t pte, *ptep = NULL;
  467. pmd_t *pmdp = NULL;
  468. spinlock_t *ptl;
  469. + bool changed;
  470. i_mmap_lock_read(mapping);
  471. vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
  472. - unsigned long address, start, end;
  473. + unsigned long address;
  474. cond_resched();
  475. @@ -602,13 +603,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
  476. continue;
  477. address = pgoff_address(index, vma);
  478. -
  479. - /*
  480. - * Note because we provide start/end to follow_pte_pmd it will
  481. - * call mmu_notifier_invalidate_range_start() on our behalf
  482. - * before taking any lock.
  483. - */
  484. - if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
  485. + changed = false;
  486. + if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
  487. continue;
  488. if (pmdp) {
  489. @@ -625,7 +621,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
  490. pmd = pmd_wrprotect(pmd);
  491. pmd = pmd_mkclean(pmd);
  492. set_pmd_at(vma->vm_mm, address, pmdp, pmd);
  493. - mmu_notifier_invalidate_range(vma->vm_mm, start, end);
  494. + changed = true;
  495. unlock_pmd:
  496. spin_unlock(ptl);
  497. #endif
  498. @@ -640,12 +636,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
  499. pte = pte_wrprotect(pte);
  500. pte = pte_mkclean(pte);
  501. set_pte_at(vma->vm_mm, address, ptep, pte);
  502. - mmu_notifier_invalidate_range(vma->vm_mm, start, end);
  503. + changed = true;
  504. unlock_pte:
  505. pte_unmap_unlock(ptep, ptl);
  506. }
  507. - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
  508. + if (changed)
  509. + mmu_notifier_invalidate_page(vma->vm_mm, address);
  510. }
  511. i_mmap_unlock_read(mapping);
  512. }
  513. diff --git a/mm/memory.c b/mm/memory.c
  514. index 969c5bf31997..7834310a6b64 100644
  515. --- a/mm/memory.c
  516. +++ b/mm/memory.c
  517. @@ -4044,8 +4044,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  518. #endif /* __PAGETABLE_PMD_FOLDED */
  519. static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
  520. - unsigned long *start, unsigned long *end,
  521. - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
  522. + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
  523. {
  524. pgd_t *pgd;
  525. p4d_t *p4d;
  526. @@ -4072,29 +4071,17 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
  527. if (!pmdpp)
  528. goto out;
  529. - if (start && end) {
  530. - *start = address & PMD_MASK;
  531. - *end = *start + PMD_SIZE;
  532. - mmu_notifier_invalidate_range_start(mm, *start, *end);
  533. - }
  534. *ptlp = pmd_lock(mm, pmd);
  535. if (pmd_huge(*pmd)) {
  536. *pmdpp = pmd;
  537. return 0;
  538. }
  539. spin_unlock(*ptlp);
  540. - if (start && end)
  541. - mmu_notifier_invalidate_range_end(mm, *start, *end);
  542. }
  543. if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
  544. goto out;
  545. - if (start && end) {
  546. - *start = address & PAGE_MASK;
  547. - *end = *start + PAGE_SIZE;
  548. - mmu_notifier_invalidate_range_start(mm, *start, *end);
  549. - }
  550. ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
  551. if (!pte_present(*ptep))
  552. goto unlock;
  553. @@ -4102,8 +4089,6 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
  554. return 0;
  555. unlock:
  556. pte_unmap_unlock(ptep, *ptlp);
  557. - if (start && end)
  558. - mmu_notifier_invalidate_range_end(mm, *start, *end);
  559. out:
  560. return -EINVAL;
  561. }
  562. @@ -4115,21 +4100,20 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
  563. /* (void) is needed to make gcc happy */
  564. (void) __cond_lock(*ptlp,
  565. - !(res = __follow_pte_pmd(mm, address, NULL, NULL,
  566. - ptepp, NULL, ptlp)));
  567. + !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
  568. + ptlp)));
  569. return res;
  570. }
  571. int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
  572. - unsigned long *start, unsigned long *end,
  573. pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
  574. {
  575. int res;
  576. /* (void) is needed to make gcc happy */
  577. (void) __cond_lock(*ptlp,
  578. - !(res = __follow_pte_pmd(mm, address, start, end,
  579. - ptepp, pmdpp, ptlp)));
  580. + !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
  581. + ptlp)));
  582. return res;
  583. }
  584. EXPORT_SYMBOL(follow_pte_pmd);
  585. diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
  586. index 314285284e6e..54ca54562928 100644
  587. --- a/mm/mmu_notifier.c
  588. +++ b/mm/mmu_notifier.c
  589. @@ -174,6 +174,20 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
  590. srcu_read_unlock(&srcu, id);
  591. }
  592. +void __mmu_notifier_invalidate_page(struct mm_struct *mm,
  593. + unsigned long address)
  594. +{
  595. + struct mmu_notifier *mn;
  596. + int id;
  597. +
  598. + id = srcu_read_lock(&srcu);
  599. + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
  600. + if (mn->ops->invalidate_page)
  601. + mn->ops->invalidate_page(mn, mm, address);
  602. + }
  603. + srcu_read_unlock(&srcu, id);
  604. +}
  605. +
  606. void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
  607. unsigned long start, unsigned long end)
  608. {
  609. diff --git a/mm/rmap.c b/mm/rmap.c
  610. index c570f82e6827..c8993c63eb25 100644
  611. --- a/mm/rmap.c
  612. +++ b/mm/rmap.c
  613. @@ -887,21 +887,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
  614. .address = address,
  615. .flags = PVMW_SYNC,
  616. };
  617. - unsigned long start = address, end;
  618. int *cleaned = arg;
  619. - /*
  620. - * We have to assume the worse case ie pmd for invalidation. Note that
  621. - * the page can not be free from this function.
  622. - */
  623. - end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
  624. - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
  625. -
  626. while (page_vma_mapped_walk(&pvmw)) {
  627. - unsigned long cstart, cend;
  628. int ret = 0;
  629. -
  630. - cstart = address = pvmw.address;
  631. + address = pvmw.address;
  632. if (pvmw.pte) {
  633. pte_t entry;
  634. pte_t *pte = pvmw.pte;
  635. @@ -914,7 +904,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
  636. entry = pte_wrprotect(entry);
  637. entry = pte_mkclean(entry);
  638. set_pte_at(vma->vm_mm, address, pte, entry);
  639. - cend = cstart + PAGE_SIZE;
  640. ret = 1;
  641. } else {
  642. #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
  643. @@ -929,8 +918,6 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
  644. entry = pmd_wrprotect(entry);
  645. entry = pmd_mkclean(entry);
  646. set_pmd_at(vma->vm_mm, address, pmd, entry);
  647. - cstart &= PMD_MASK;
  648. - cend = cstart + PMD_SIZE;
  649. ret = 1;
  650. #else
  651. /* unexpected pmd-mapped page? */
  652. @@ -939,13 +926,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
  653. }
  654. if (ret) {
  655. - mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend);
  656. + mmu_notifier_invalidate_page(vma->vm_mm, address);
  657. (*cleaned)++;
  658. }
  659. }
  660. - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
  661. -
  662. return true;
  663. }
  664. @@ -1339,7 +1324,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
  665. pte_t pteval;
  666. struct page *subpage;
  667. bool ret = true;
  668. - unsigned long start = address, end;
  669. enum ttu_flags flags = (enum ttu_flags)arg;
  670. /* munlock has nothing to gain from examining un-locked vmas */
  671. @@ -1351,14 +1335,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
  672. flags & TTU_MIGRATION, page);
  673. }
  674. - /*
  675. - * We have to assume the worse case ie pmd for invalidation. Note that
  676. - * the page can not be free in this function as call of try_to_unmap()
  677. - * must hold a reference on the page.
  678. - */
  679. - end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
  680. - mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
  681. -
  682. while (page_vma_mapped_walk(&pvmw)) {
  683. /*
  684. * If the page is mlock()d, we cannot swap it out.
  685. @@ -1469,7 +1445,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
  686. if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
  687. WARN_ON_ONCE(1);
  688. ret = false;
  689. - /* We have to invalidate as we cleared the pte */
  690. page_vma_mapped_walk_done(&pvmw);
  691. break;
  692. }
  693. @@ -1515,12 +1490,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
  694. discard:
  695. page_remove_rmap(subpage, PageHuge(page));
  696. put_page(page);
  697. - mmu_notifier_invalidate_range(mm, address,
  698. - address + PAGE_SIZE);
  699. + mmu_notifier_invalidate_page(mm, address);
  700. }
  701. -
  702. - mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
  703. -
  704. return ret;
  705. }
  706. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
  707. index bfa9c4d34102..1d048ef969a8 100644
  708. --- a/virt/kvm/kvm_main.c
  709. +++ b/virt/kvm/kvm_main.c
  710. @@ -322,6 +322,47 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
  711. return container_of(mn, struct kvm, mmu_notifier);
  712. }
  713. +static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
  714. + struct mm_struct *mm,
  715. + unsigned long address)
  716. +{
  717. + struct kvm *kvm = mmu_notifier_to_kvm(mn);
  718. + int need_tlb_flush, idx;
  719. +
  720. + /*
  721. + * When ->invalidate_page runs, the linux pte has been zapped
  722. + * already but the page is still allocated until
  723. + * ->invalidate_page returns. So if we increase the sequence
  724. + * here the kvm page fault will notice if the spte can't be
  725. + * established because the page is going to be freed. If
  726. + * instead the kvm page fault establishes the spte before
  727. + * ->invalidate_page runs, kvm_unmap_hva will release it
  728. + * before returning.
  729. + *
  730. + * The sequence increase only need to be seen at spin_unlock
  731. + * time, and not at spin_lock time.
  732. + *
  733. + * Increasing the sequence after the spin_unlock would be
  734. + * unsafe because the kvm page fault could then establish the
  735. + * pte after kvm_unmap_hva returned, without noticing the page
  736. + * is going to be freed.
  737. + */
  738. + idx = srcu_read_lock(&kvm->srcu);
  739. + spin_lock(&kvm->mmu_lock);
  740. +
  741. + kvm->mmu_notifier_seq++;
  742. + need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
  743. + /* we've to flush the tlb before the pages can be freed */
  744. + if (need_tlb_flush)
  745. + kvm_flush_remote_tlbs(kvm);
  746. +
  747. + spin_unlock(&kvm->mmu_lock);
  748. +
  749. + kvm_arch_mmu_notifier_invalidate_page(kvm, address);
  750. +
  751. + srcu_read_unlock(&kvm->srcu, idx);
  752. +}
  753. +
  754. static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
  755. struct mm_struct *mm,
  756. unsigned long address,
  757. @@ -469,6 +510,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
  758. }
  759. static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
  760. + .invalidate_page = kvm_mmu_notifier_invalidate_page,
  761. .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
  762. .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
  763. .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
  764. --
  765. 2.14.2