0050-mm-x86-mm-Fix-performance-regression-in-get_user_pag.patch 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. From 7c5d42f31bf68647dd00ac2fef9057d113e8072d Mon Sep 17 00:00:00 2001
  2. From: "Kirill A. Shutemov" <[email protected]>
  3. Date: Sat, 9 Sep 2017 00:56:03 +0300
  4. Subject: [PATCH 050/242] mm, x86/mm: Fix performance regression in
  5. get_user_pages_fast()
  6. MIME-Version: 1.0
  7. Content-Type: text/plain; charset=UTF-8
  8. Content-Transfer-Encoding: 8bit
  9. CVE-2017-5754
  10. The 0-day test bot found a performance regression that was tracked down to
  11. switching x86 to the generic get_user_pages_fast() implementation:
  12. http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
  13. The regression was caused by the fact that we now use local_irq_save() +
  14. local_irq_restore() in get_user_pages_fast() to disable interrupts.
  15. In x86 implementation local_irq_disable() + local_irq_enable() was used.
  16. The fix is to make get_user_pages_fast() use local_irq_disable(),
  17. leaving local_irq_save() for __get_user_pages_fast() that can be called
  18. with interrupts disabled.
  19. Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
  20. Before: Average: 14.91 ms, stddev: 0.45 ms
  21. After: Average: 10.76 ms, stddev: 0.18 ms
  22. Signed-off-by: Kirill A. Shutemov <[email protected]>
  23. Cc: Andrew Morton <[email protected]>
  24. Cc: Huang Ying <[email protected]>
  25. Cc: Jonathan Corbet <[email protected]>
  26. Cc: Linus Torvalds <[email protected]>
  27. Cc: Peter Zijlstra <[email protected]>
  28. Cc: Thomas Gleixner <[email protected]>
  29. Cc: Thorsten Leemhuis <[email protected]>
  30. Cc: [email protected]
  31. Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
  32. Link: http://lkml.kernel.org/r/[email protected]
  33. Signed-off-by: Ingo Molnar <[email protected]>
  34. (cherry picked from commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80)
  35. Signed-off-by: Andy Whitcroft <[email protected]>
  36. Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
  37. (cherry picked from commit 5241f4b2c68284612e34910305f3234e4a64701b)
  38. Signed-off-by: Fabian Grünbichler <[email protected]>
  39. ---
  40. mm/gup.c | 97 ++++++++++++++++++++++++++++++++++++++--------------------------
  41. 1 file changed, 58 insertions(+), 39 deletions(-)
  42. diff --git a/mm/gup.c b/mm/gup.c
  43. index 23f01c40c88f..4a789f1c6a27 100644
  44. --- a/mm/gup.c
  45. +++ b/mm/gup.c
  46. @@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
  47. return 1;
  48. }
  49. +static void gup_pgd_range(unsigned long addr, unsigned long end,
  50. + int write, struct page **pages, int *nr)
  51. +{
  52. + unsigned long next;
  53. + pgd_t *pgdp;
  54. +
  55. + pgdp = pgd_offset(current->mm, addr);
  56. + do {
  57. + pgd_t pgd = READ_ONCE(*pgdp);
  58. +
  59. + next = pgd_addr_end(addr, end);
  60. + if (pgd_none(pgd))
  61. + return;
  62. + if (unlikely(pgd_huge(pgd))) {
  63. + if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
  64. + pages, nr))
  65. + return;
  66. + } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
  67. + if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
  68. + PGDIR_SHIFT, next, write, pages, nr))
  69. + return;
  70. + } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
  71. + return;
  72. + } while (pgdp++, addr = next, addr != end);
  73. +}
  74. +
  75. +#ifndef gup_fast_permitted
  76. +/*
  77. + * Check if it's allowed to use __get_user_pages_fast() for the range, or
  78. + * we need to fall back to the slow version:
  79. + */
  80. +bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
  81. +{
  82. + unsigned long len, end;
  83. +
  84. + len = (unsigned long) nr_pages << PAGE_SHIFT;
  85. + end = start + len;
  86. + return end >= start;
  87. +}
  88. +#endif
  89. +
  90. /*
  91. * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  92. * the regular GUP. It will only return non-negative values.
  93. @@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
  94. int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  95. struct page **pages)
  96. {
  97. - struct mm_struct *mm = current->mm;
  98. unsigned long addr, len, end;
  99. - unsigned long next, flags;
  100. - pgd_t *pgdp;
  101. + unsigned long flags;
  102. int nr = 0;
  103. start &= PAGE_MASK;
  104. @@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  105. * block IPIs that come from THPs splitting.
  106. */
  107. - local_irq_save(flags);
  108. - pgdp = pgd_offset(mm, addr);
  109. - do {
  110. - pgd_t pgd = READ_ONCE(*pgdp);
  111. -
  112. - next = pgd_addr_end(addr, end);
  113. - if (pgd_none(pgd))
  114. - break;
  115. - if (unlikely(pgd_huge(pgd))) {
  116. - if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
  117. - pages, &nr))
  118. - break;
  119. - } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
  120. - if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
  121. - PGDIR_SHIFT, next, write, pages, &nr))
  122. - break;
  123. - } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
  124. - break;
  125. - } while (pgdp++, addr = next, addr != end);
  126. - local_irq_restore(flags);
  127. + if (gup_fast_permitted(start, nr_pages, write)) {
  128. + local_irq_save(flags);
  129. + gup_pgd_range(addr, end, write, pages, &nr);
  130. + local_irq_restore(flags);
  131. + }
  132. return nr;
  133. }
  134. -#ifndef gup_fast_permitted
  135. -/*
  136. - * Check if it's allowed to use __get_user_pages_fast() for the range, or
  137. - * we need to fall back to the slow version:
  138. - */
  139. -bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
  140. -{
  141. - unsigned long len, end;
  142. -
  143. - len = (unsigned long) nr_pages << PAGE_SHIFT;
  144. - end = start + len;
  145. - return end >= start;
  146. -}
  147. -#endif
  148. -
  149. /**
  150. * get_user_pages_fast() - pin user pages in memory
  151. * @start: starting user address
  152. @@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
  153. int get_user_pages_fast(unsigned long start, int nr_pages, int write,
  154. struct page **pages)
  155. {
  156. + unsigned long addr, len, end;
  157. int nr = 0, ret = 0;
  158. start &= PAGE_MASK;
  159. + addr = start;
  160. + len = (unsigned long) nr_pages << PAGE_SHIFT;
  161. + end = start + len;
  162. +
  163. + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
  164. + (void __user *)start, len)))
  165. + return 0;
  166. if (gup_fast_permitted(start, nr_pages, write)) {
  167. - nr = __get_user_pages_fast(start, nr_pages, write, pages);
  168. + local_irq_disable();
  169. + gup_pgd_range(addr, end, write, pages, &nr);
  170. + local_irq_enable();
  171. ret = nr;
  172. }
  173. --
  174. 2.14.2