020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. From 70d216c71ff5c5b17dd1da6294f97b91fb6aba7a Mon Sep 17 00:00:00 2001
  2. From: Yu Zhao <[email protected]>
  3. Date: Fri, 30 Dec 2022 14:52:51 -0700
  4. Subject: [PATCH 10/19] UPSTREAM: mm: add vma_has_recency()
  5. Add vma_has_recency() to indicate whether a VMA may exhibit temporal
  6. locality that the LRU algorithm relies on.
  7. This function returns false for VMAs marked by VM_SEQ_READ or
  8. VM_RAND_READ. While the former flag indicates linear access, i.e., a
  9. special case of spatial locality, both flags indicate a lack of temporal
  10. locality, i.e., the reuse of an area within a relatively small duration.
  11. "Recency" is chosen over "locality" to avoid confusion between temporal
  12. and spatial localities.
  13. Before this patch, the active/inactive LRU only ignored the accessed bit
  14. from VMAs marked by VM_SEQ_READ. After this patch, the active/inactive
  15. LRU and MGLRU share the same logic: they both ignore the accessed bit if
  16. vma_has_recency() returns false.
  17. For the active/inactive LRU, the following fio test showed a [6, 8]%
  18. increase in IOPS when randomly accessing mapped files under memory
  19. pressure.
  20. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
  21. kb=$((kb - 8*1024*1024))
  22. modprobe brd rd_nr=1 rd_size=$kb
  23. dd if=/dev/zero of=/dev/ram0 bs=1M
  24. mkfs.ext4 /dev/ram0
  25. mount /dev/ram0 /mnt/
  26. swapoff -a
  27. fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \
  28. --size=8G --rw=randrw --time_based --runtime=10m \
  29. --group_reporting
  30. The discussion that led to this patch is here [1]. Additional test
  31. results are available in that thread.
  32. [1] https://lore.kernel.org/r/Y31s%[email protected]/
  33. Link: https://lkml.kernel.org/r/[email protected]
  34. Change-Id: I291dcb795197659e40e46539cd32b857677c34ad
  35. Signed-off-by: Yu Zhao <[email protected]>
  36. Cc: Alexander Viro <[email protected]>
  37. Cc: Andrea Righi <[email protected]>
  38. Cc: Johannes Weiner <[email protected]>
  39. Cc: Michael Larabel <[email protected]>
  40. Signed-off-by: Andrew Morton <[email protected]>
  41. (cherry picked from commit 8788f6781486769d9598dcaedc3fe0eb12fc3e59)
  42. Bug: 274865848
  43. Signed-off-by: T.J. Mercier <[email protected]>
  44. ---
  45. include/linux/mm_inline.h | 8 ++++++++
  46. mm/memory.c | 7 +++----
  47. mm/rmap.c | 42 +++++++++++++++++----------------------
  48. mm/vmscan.c | 5 ++++-
  49. 4 files changed, 33 insertions(+), 29 deletions(-)
  50. --- a/include/linux/mm_inline.h
  51. +++ b/include/linux/mm_inline.h
  52. @@ -595,4 +595,12 @@ pte_install_uffd_wp_if_needed(struct vm_
  53. #endif
  54. }
  55. +static inline bool vma_has_recency(struct vm_area_struct *vma)
  56. +{
  57. + if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
  58. + return false;
  59. +
  60. + return true;
  61. +}
  62. +
  63. #endif
  64. --- a/mm/memory.c
  65. +++ b/mm/memory.c
  66. @@ -1445,8 +1445,7 @@ again:
  67. force_flush = 1;
  68. set_page_dirty(page);
  69. }
  70. - if (pte_young(ptent) &&
  71. - likely(!(vma->vm_flags & VM_SEQ_READ)))
  72. + if (pte_young(ptent) && likely(vma_has_recency(vma)))
  73. mark_page_accessed(page);
  74. }
  75. rss[mm_counter(page)]--;
  76. @@ -5199,8 +5198,8 @@ static inline void mm_account_fault(stru
  77. #ifdef CONFIG_LRU_GEN
  78. static void lru_gen_enter_fault(struct vm_area_struct *vma)
  79. {
  80. - /* the LRU algorithm doesn't apply to sequential or random reads */
  81. - current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
  82. + /* the LRU algorithm only applies to accesses with recency */
  83. + current->in_lru_fault = vma_has_recency(vma);
  84. }
  85. static void lru_gen_exit_fault(void)
  86. --- a/mm/rmap.c
  87. +++ b/mm/rmap.c
  88. @@ -823,25 +823,14 @@ static bool folio_referenced_one(struct
  89. }
  90. if (pvmw.pte) {
  91. - if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
  92. - !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
  93. + if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
  94. lru_gen_look_around(&pvmw);
  95. referenced++;
  96. }
  97. if (ptep_clear_flush_young_notify(vma, address,
  98. - pvmw.pte)) {
  99. - /*
  100. - * Don't treat a reference through
  101. - * a sequentially read mapping as such.
  102. - * If the folio has been used in another mapping,
  103. - * we will catch it; if this other mapping is
  104. - * already gone, the unmap path will have set
  105. - * the referenced flag or activated the folio.
  106. - */
  107. - if (likely(!(vma->vm_flags & VM_SEQ_READ)))
  108. - referenced++;
  109. - }
  110. + pvmw.pte))
  111. + referenced++;
  112. } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
  113. if (pmdp_clear_flush_young_notify(vma, address,
  114. pvmw.pmd))
  115. @@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma
  116. struct folio_referenced_arg *pra = arg;
  117. struct mem_cgroup *memcg = pra->memcg;
  118. - if (!mm_match_cgroup(vma->vm_mm, memcg))
  119. + /*
  120. + * Ignore references from this mapping if it has no recency. If the
  121. + * folio has been used in another mapping, we will catch it; if this
  122. + * other mapping is already gone, the unmap path will have set the
  123. + * referenced flag or activated the folio in zap_pte_range().
  124. + */
  125. + if (!vma_has_recency(vma))
  126. + return true;
  127. +
  128. + /*
  129. + * If we are reclaiming on behalf of a cgroup, skip counting on behalf
  130. + * of references from different cgroups.
  131. + */
  132. + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
  133. return true;
  134. return false;
  135. @@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio
  136. .arg = (void *)&pra,
  137. .anon_lock = folio_lock_anon_vma_read,
  138. .try_lock = true,
  139. + .invalid_vma = invalid_folio_referenced_vma,
  140. };
  141. *vm_flags = 0;
  142. @@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio
  143. return 1;
  144. }
  145. - /*
  146. - * If we are reclaiming on behalf of a cgroup, skip
  147. - * counting on behalf of references from different
  148. - * cgroups
  149. - */
  150. - if (memcg) {
  151. - rwc.invalid_vma = invalid_folio_referenced_vma;
  152. - }
  153. -
  154. rmap_walk(folio, &rwc);
  155. *vm_flags = pra.vm_flags;
  156. --- a/mm/vmscan.c
  157. +++ b/mm/vmscan.c
  158. @@ -3778,7 +3778,10 @@ static int should_skip_vma(unsigned long
  159. if (is_vm_hugetlb_page(vma))
  160. return true;
  161. - if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
  162. + if (!vma_has_recency(vma))
  163. + return true;
  164. +
  165. + if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
  166. return true;
  167. if (vma == get_gate_vma(vma->vm_mm))