浏览代码

replace Stack-Clash fix with upstream version

sicne the Ubuntu / Suse one seems to have some segfaulting
issues.
Fabian Grünbichler 8 年之前
父节点
当前提交
b4b8080506

+ 461 - 0
CVE-2017-100364-0001-Revert-mm-enlarge-stack-guard-gap.patch

@@ -0,0 +1,461 @@
+From 0a0e88a03210365fb82b7ab9cebfccca31aff608 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <[email protected]>
+Date: Fri, 23 Jun 2017 08:25:20 +0200
+Subject: [PATCH 1/4] Revert "mm: enlarge stack guard gap"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit fe388e5751e74b3534ee21d01b999795dfc83d39.
+
+Signed-off-by: Fabian Grünbichler <[email protected]>
+---
+ include/linux/mm.h   |  40 +++++++++++---
+ arch/ia64/mm/fault.c |   2 +-
+ fs/exec.c            |   8 +--
+ fs/proc/task_mmu.c   |  11 ++--
+ mm/gup.c             |   4 +-
+ mm/memory.c          |  35 +++++++++++-
+ mm/mmap.c            | 152 ++++++++++-----------------------------------------
+ 7 files changed, 102 insertions(+), 150 deletions(-)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index fbe65ceafb94..3978a350e9e4 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1366,11 +1366,39 @@ int clear_page_dirty_for_io(struct page *page);
+ 
+ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
+ 
++/* Is the vma a continuation of the stack vma above it? */
++static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
++{
++	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
++}
++
+ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+ {
+ 	return !vma->vm_ops;
+ }
+ 
++static inline int stack_guard_page_start(struct vm_area_struct *vma,
++					     unsigned long addr)
++{
++	return (vma->vm_flags & VM_GROWSDOWN) &&
++		(vma->vm_start == addr) &&
++		!vma_growsdown(vma->vm_prev, addr);
++}
++
++/* Is the vma a continuation of the stack vma below it? */
++static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
++{
++	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
++}
++
++static inline int stack_guard_page_end(struct vm_area_struct *vma,
++					   unsigned long addr)
++{
++	return (vma->vm_flags & VM_GROWSUP) &&
++		(vma->vm_end == addr) &&
++		!vma_growsup(vma->vm_next, addr);
++}
++
+ int vma_is_stack_for_current(struct vm_area_struct *vma);
+ 
+ extern unsigned long move_page_tables(struct vm_area_struct *vma,
+@@ -2111,22 +2139,16 @@ void page_cache_async_readahead(struct address_space *mapping,
+ 				pgoff_t offset,
+ 				unsigned long size);
+ 
+-extern unsigned long stack_guard_gap;
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+-extern int stack_guard_area(struct vm_area_struct *vma, unsigned long address);
+ 
+ /* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */
+ extern int expand_downwards(struct vm_area_struct *vma,
+-		unsigned long address, unsigned long gap);
+-unsigned long expandable_stack_area(struct vm_area_struct *vma,
+-		unsigned long address, unsigned long *gap);
+-
++		unsigned long address);
+ #if VM_GROWSUP
+-extern int expand_upwards(struct vm_area_struct *vma,
+-		unsigned long address, unsigned long gap);
++extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+ #else
+-  #define expand_upwards(vma, address, gap) (0)
++  #define expand_upwards(vma, address) (0)
+ #endif
+ 
+ /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
+index d5caa3cab925..fa6ad95e992e 100644
+--- a/arch/ia64/mm/fault.c
++++ b/arch/ia64/mm/fault.c
+@@ -224,7 +224,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
+ 		 */
+ 		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+ 			goto bad_area;
+-		if (expand_upwards(vma, address, 0))
++		if (expand_upwards(vma, address))
+ 			goto bad_area;
+ 	}
+ 	goto good_area;
+diff --git a/fs/exec.c b/fs/exec.c
+index 5b6383208379..1825e64f8bf3 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -205,7 +205,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+ 
+ #ifdef CONFIG_STACK_GROWSUP
+ 	if (write) {
+-		ret = expand_downwards(bprm->vma, pos, 0);
++		ret = expand_downwards(bprm->vma, pos);
+ 		if (ret < 0)
+ 			return NULL;
+ 	}
+@@ -227,12 +227,6 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+ 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ 		struct rlimit *rlim;
+ 
+-		/*
+-		 * GRWOSUP doesn't really have any gap at this stage because we grow
+-		 * the stack down now. See the expand_downwards above.
+-		 */
+-		if (!IS_ENABLED(CONFIG_STACK_GROWSUP))
+-			size -= stack_guard_gap;
+ 		acct_arg_size(bprm, size / PAGE_SIZE);
+ 
+ 		/*
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 75f9099e2ecf..52049b595b0f 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -302,14 +302,11 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
+ 
+ 	/* We don't show the stack guard page in /proc/maps */
+ 	start = vma->vm_start;
++	if (stack_guard_page_start(vma, start))
++		start += PAGE_SIZE;
+ 	end = vma->vm_end;
+-	if (vma->vm_flags & VM_GROWSDOWN) {
+-		if (stack_guard_area(vma, start))
+-			start += stack_guard_gap;
+-	} else if (vma->vm_flags & VM_GROWSUP) {
+-		if (stack_guard_area(vma, end))
+-			end -= stack_guard_gap;
+-	}
++	if (stack_guard_page_end(vma, end))
++		end -= PAGE_SIZE;
+ 
+ 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+diff --git a/mm/gup.c b/mm/gup.c
+index 90252d1038b9..bb5f3d69f87e 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -372,7 +372,9 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+ 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ 		return -ENOENT;
+ 	/* For mm_populate(), just skip the stack guard page. */
+-	if ((*flags & FOLL_POPULATE) && stack_guard_area(vma, address))
++	if ((*flags & FOLL_POPULATE) &&
++			(stack_guard_page_start(vma, address) ||
++			 stack_guard_page_end(vma, address + PAGE_SIZE)))
+ 		return -ENOENT;
+ 	if (*flags & FOLL_WRITE)
+ 		fault_flags |= FAULT_FLAG_WRITE;
+diff --git a/mm/memory.c b/mm/memory.c
+index fca9dc75a04d..c89214451507 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2714,7 +2714,39 @@ int do_swap_page(struct vm_fault *vmf)
+ 	return ret;
+ }
+ 
++/*
++ * This is like a special single-page "expand_{down|up}wards()",
++ * except we must first make sure that 'address{-|+}PAGE_SIZE'
++ * doesn't hit another vma.
++ */
++static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
++{
++	address &= PAGE_MASK;
++	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
++		struct vm_area_struct *prev = vma->vm_prev;
++
++		/*
++		 * Is there a mapping abutting this one below?
++		 *
++		 * That's only ok if it's the same stack mapping
++		 * that has gotten split..
++		 */
++		if (prev && prev->vm_end == address)
++			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+ 
++		return expand_downwards(vma, address - PAGE_SIZE);
++	}
++	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
++		struct vm_area_struct *next = vma->vm_next;
++
++		/* As VM_GROWSDOWN but s/below/above/ */
++		if (next && next->vm_start == address + PAGE_SIZE)
++			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
++
++		return expand_upwards(vma, address + PAGE_SIZE);
++	}
++	return 0;
++}
+ 
+ /*
+  * We enter with non-exclusive mmap_sem (to exclude vma changes,
+@@ -2733,8 +2765,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
+ 		return VM_FAULT_SIGBUS;
+ 
+ 	/* Check if we need to add a guard page to the stack */
+-	if ((vma->vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) &&
+-			expand_stack(vma, vmf->address) < 0)
++	if (check_stack_guard_page(vma, vmf->address) < 0)
+ 		return VM_FAULT_SIGSEGV;
+ 
+ 	/*
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 2a3bdf11baf0..4b3a2aaa18e9 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -2151,8 +2151,7 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
+  * update accounting. This is shared with both the
+  * grow-up and grow-down cases.
+  */
+-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow,
+-		unsigned long gap)
++static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	struct rlimit *rlim = current->signal->rlim;
+@@ -2165,7 +2164,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
+ 	/* Stack limit test */
+ 	actual_size = size;
+ 	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+-		actual_size -= gap;
++		actual_size -= PAGE_SIZE;
+ 	if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ 		return -ENOMEM;
+ 
+@@ -2201,7 +2200,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
+  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+  * vma is the last one with address > vma->vm_end.  Have to extend vma.
+  */
+-int expand_upwards(struct vm_area_struct *vma, unsigned long address, unsigned long gap)
++int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	int error = 0;
+@@ -2209,6 +2208,12 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address, unsigned l
+ 	if (!(vma->vm_flags & VM_GROWSUP))
+ 		return -EFAULT;
+ 
++	/* Guard against wrapping around to address 0. */
++	if (address < PAGE_ALIGN(address+4))
++		address = PAGE_ALIGN(address+4);
++	else
++		return -ENOMEM;
++
+ 	/* We must make sure the anon_vma is allocated. */
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		return -ENOMEM;
+@@ -2229,7 +2234,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address, unsigned l
+ 
+ 		error = -ENOMEM;
+ 		if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
+-			error = acct_stack_growth(vma, size, grow, gap);
++			error = acct_stack_growth(vma, size, grow);
+ 			if (!error) {
+ 				/*
+ 				 * vma_gap_update() doesn't support concurrent
+@@ -2270,7 +2275,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address, unsigned l
+  * vma is the first one with address < vma->vm_start.  Have to extend vma.
+  */
+ int expand_downwards(struct vm_area_struct *vma,
+-				   unsigned long address, unsigned long gap)
++				   unsigned long address)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	int error;
+@@ -2300,7 +2305,7 @@ int expand_downwards(struct vm_area_struct *vma,
+ 
+ 		error = -ENOMEM;
+ 		if (grow <= vma->vm_pgoff) {
+-			error = acct_stack_growth(vma, size, grow, gap);
++			error = acct_stack_growth(vma, size, grow);
+ 			if (!error) {
+ 				/*
+ 				 * vma_gap_update() doesn't support concurrent
+@@ -2334,72 +2339,29 @@ int expand_downwards(struct vm_area_struct *vma,
+ 	return error;
+ }
+ 
+-/* enforced gap between the expanding stack and other mappings. */
+-unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+-
+ /*
+  * Note how expand_stack() refuses to expand the stack all the way to
+  * abut the next virtual mapping, *unless* that mapping itself is also
+- * a stack mapping. We want to leave room for a guard area, after all
++ * a stack mapping. We want to leave room for a guard page, after all
+  * (the guard page itself is not added here, that is done by the
+  * actual page faulting logic)
++ *
++ * This matches the behavior of the guard page logic (see mm/memory.c:
++ * check_stack_guard_page()), which only allows the guard page to be
++ * removed under these circumstances.
+  */
+ #ifdef CONFIG_STACK_GROWSUP
+-unsigned long expandable_stack_area(struct vm_area_struct *vma,
+-		unsigned long address, unsigned long *gap)
+-{
+-	struct vm_area_struct *next = vma->vm_next;
+-	unsigned long guard_gap = stack_guard_gap;
+-	unsigned long guard_addr;
+-
+-	address = ALIGN(address, PAGE_SIZE);;
+-	if (!next)
+-		goto out;
+-
+-	if (next->vm_flags & VM_GROWSUP) {
+-		guard_gap = min(guard_gap, next->vm_start - address);
+-		goto out;
+-	}
+-
+-	if (next->vm_start - address < guard_gap)
+-		return -ENOMEM;
+-out:
+-	if (TASK_SIZE - address < guard_gap)
+-		guard_gap = TASK_SIZE - address;
+-	guard_addr = address + guard_gap;
+-	*gap = guard_gap;
+-
+-	return guard_addr;
+-}
+-
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-	unsigned long gap;
+-
+-	address = expandable_stack_area(vma, address, &gap);
+-	if (IS_ERR_VALUE(address))
+-		return -ENOMEM;
+-	return expand_upwards(vma, address, gap);
+-}
+-
+-int stack_guard_area(struct vm_area_struct *vma, unsigned long address)
+-{
+ 	struct vm_area_struct *next;
+ 
+-	if (!(vma->vm_flags & VM_GROWSUP))
+-		return 0;
+-
+-	/*
+-	 * strictly speaking there is a guard gap between disjoint stacks
+-	 * but the gap is not canonical (it might be smaller) and it is
+-	 * reasonably safe to assume that we can ignore that gap for stack
+-	 * POPULATE or /proc/<pid>[s]maps purposes
+-	 */
++	address &= PAGE_MASK;
+ 	next = vma->vm_next;
+-	if (next && next->vm_flags & VM_GROWSUP)
+-		return 0;
+-
+-	return vma->vm_end - address <= stack_guard_gap;
++	if (next && next->vm_start == address + PAGE_SIZE) {
++		if (!(next->vm_flags & VM_GROWSUP))
++			return -ENOMEM;
++	}
++	return expand_upwards(vma, address);
+ }
+ 
+ struct vm_area_struct *
+@@ -2418,73 +2380,17 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
+ 	return prev;
+ }
+ #else
+-unsigned long expandable_stack_area(struct vm_area_struct *vma,
+-		unsigned long address, unsigned long *gap)
+-{
+-	struct vm_area_struct *prev = vma->vm_prev;
+-	unsigned long guard_gap = stack_guard_gap;
+-	unsigned long guard_addr;
+-
+-	address &= PAGE_MASK;
+-	if (!prev)
+-		goto out;
+-
+-	/*
+-	 * Is there a mapping abutting this one below?
+-	 *
+-	 * That's only ok if it's the same stack mapping
+-	 * that has gotten split or there is sufficient gap
+-	 * between mappings
+-	 */
+-	if (prev->vm_flags & VM_GROWSDOWN) {
+-		guard_gap = min(guard_gap, address - prev->vm_end);
+-		goto out;
+-	}
+-
+-	if (address - prev->vm_end < guard_gap)
+-		return -ENOMEM;
+-
+-out:
+-	/* make sure we won't underflow */
+-	if (address < mmap_min_addr)
+-		return -ENOMEM;
+-	if (address - mmap_min_addr < guard_gap)
+-		guard_gap = address - mmap_min_addr;
+-
+-	guard_addr = address - guard_gap;
+-	*gap = guard_gap;
+-
+-	return guard_addr;
+-}
+-
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-	unsigned long gap;
+-
+-	address = expandable_stack_area(vma, address, &gap);
+-	if (IS_ERR_VALUE(address))
+-		return -ENOMEM;
+-	return expand_downwards(vma, address, gap);
+-}
+-
+-int stack_guard_area(struct vm_area_struct *vma, unsigned long address)
+-{
+ 	struct vm_area_struct *prev;
+ 
+-	if (!(vma->vm_flags & VM_GROWSDOWN))
+-		return 0;
+-
+-	/*
+-	 * strictly speaking there is a guard gap between disjoint stacks
+-	 * but the gap is not canonical (it might be smaller) and it is
+-	 * reasonably safe to assume that we can ignore that gap for stack
+-	 * POPULATE or /proc/<pid>[s]maps purposes
+-	 */
++	address &= PAGE_MASK;
+ 	prev = vma->vm_prev;
+-	if (prev && prev->vm_flags & VM_GROWSDOWN)
+-		return 0;
+-
+-	return address - vma->vm_start < stack_guard_gap;
++	if (prev && prev->vm_end == address) {
++		if (!(prev->vm_flags & VM_GROWSDOWN))
++			return -ENOMEM;
++	}
++	return expand_downwards(vma, address);
+ }
+ 
+ struct vm_area_struct *
+-- 
+2.11.0
+

+ 48 - 0
CVE-2017-100364-0002-Revert-mm-do-not-collapse-stack-gap-into-THP.patch

@@ -0,0 +1,48 @@
+From 0bfadbc4942a14d702d781c5b6a00ec747f4ed09 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <[email protected]>
+Date: Fri, 23 Jun 2017 08:25:04 +0200
+Subject: [PATCH 2/4] Revert "mm: do not collapse stack gap into THP"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This reverts commit e9dbbeb2e0b61881d67ba7818fd4b3f996a35f0b.
+
+Signed-off-by: Fabian Grünbichler <[email protected]>
+---
+ mm/huge_memory.c | 3 ---
+ mm/khugepaged.c  | 4 ----
+ 2 files changed, 7 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 75719aa0443a..49cb70b5993d 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -660,9 +660,6 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
+ 
+ 	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ 		return VM_FAULT_FALLBACK;
+-	if (stack_guard_area(vma, haddr) ||
+-			stack_guard_area(vma, haddr + HPAGE_PMD_SIZE))
+-		return VM_FAULT_FALLBACK;
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		return VM_FAULT_OOM;
+ 	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
+diff --git a/mm/khugepaged.c b/mm/khugepaged.c
+index 16379e5943a6..77ae3239c3de 100644
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -859,10 +859,6 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+ 		return SCAN_ADDRESS_RANGE;
+ 	if (!hugepage_vma_check(vma))
+ 		return SCAN_VMA_CHECK;
+-
+-	/* never try to collapse stack gap */
+-	if (stack_guard_area(vma, hstart) || stack_guard_area(vma, hend))
+-		return SCAN_ADDRESS_RANGE;
+ 	return 0;
+ }
+ 
+-- 
+2.11.0
+

+ 940 - 0
CVE-2017-100364-0003-mm-larger-stack-guard-gap-between-vmas.patch

@@ -0,0 +1,940 @@
+From f2ca8eceb0ddcdb6b113b3edd3dfd36c171854f6 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <[email protected]>
+Date: Mon, 19 Jun 2017 04:03:24 -0700
+Subject: [PATCH 3/4] mm: larger stack guard gap, between vmas
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream.
+
+Stack guard page is a useful feature to reduce a risk of stack smashing
+into a different mapping. We have been using a single page gap which
+is sufficient to prevent having stack adjacent to a different mapping.
+But this seems to be insufficient in the light of the stack usage in
+userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
+used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
+which is 256kB or stack strings with MAX_ARG_STRLEN.
+
+This will become especially dangerous for suid binaries and the default
+no limit for the stack size limit because those applications can be
+tricked to consume a large portion of the stack and a single glibc call
+could jump over the guard page. These attacks are not theoretical,
+unfortunatelly.
+
+Make those attacks less probable by increasing the stack guard gap
+to 1MB (on systems with 4k pages; but make it depend on the page size
+because systems with larger base pages might cap stack allocations in
+the PAGE_SIZE units) which should cover larger alloca() and VLA stack
+allocations. It is obviously not a full fix because the problem is
+somehow inherent, but it should reduce attack space a lot.
+
+One could argue that the gap size should be configurable from userspace,
+but that can be done later when somebody finds that the new 1MB is wrong
+for some special case applications.  For now, add a kernel command line
+option (stack_guard_gap) to specify the stack gap size (in page units).
+
+Implementation wise, first delete all the old code for stack guard page:
+because although we could get away with accounting one extra page in a
+stack vma, accounting a larger gap can break userspace - case in point,
+a program run with "ulimit -S -v 20000" failed when the 1MB gap was
+counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
+and strict non-overcommit mode.
+
+Instead of keeping gap inside the stack vma, maintain the stack guard
+gap as a gap between vmas: using vm_start_gap() in place of vm_start
+(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
+places which need to respect the gap - mainly arch_get_unmapped_area(),
+and and the vma tree's subtree_gap support for that.
+
+Original-patch-by: Oleg Nesterov <[email protected]>
+Original-patch-by: Michal Hocko <[email protected]>
+Signed-off-by: Hugh Dickins <[email protected]>
+Acked-by: Michal Hocko <[email protected]>
+Tested-by: Helge Deller <[email protected]> # parisc
+Signed-off-by: Linus Torvalds <[email protected]>
+[wt: backport to 4.11: adjust context]
+Signed-off-by: Willy Tarreau <[email protected]>
+Signed-off-by: Greg Kroah-Hartman <[email protected]>
+[fg: backport to 4.10: adjust context]
+
+Signed-off-by: Fabian Grünbichler <[email protected]>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   7 ++
+ include/linux/mm.h                              |  53 ++++-----
+ arch/arc/mm/mmap.c                              |   2 +-
+ arch/arm/mm/mmap.c                              |   4 +-
+ arch/frv/mm/elf-fdpic.c                         |   2 +-
+ arch/mips/mm/mmap.c                             |   2 +-
+ arch/parisc/kernel/sys_parisc.c                 |  15 ++-
+ arch/powerpc/mm/hugetlbpage-radix.c             |   2 +-
+ arch/powerpc/mm/mmap.c                          |   4 +-
+ arch/powerpc/mm/slice.c                         |   2 +-
+ arch/s390/mm/mmap.c                             |   4 +-
+ arch/sh/mm/mmap.c                               |   4 +-
+ arch/sparc/kernel/sys_sparc_64.c                |   4 +-
+ arch/sparc/mm/hugetlbpage.c                     |   2 +-
+ arch/tile/mm/hugetlbpage.c                      |   2 +-
+ arch/x86/kernel/sys_x86_64.c                    |   4 +-
+ arch/x86/mm/hugetlbpage.c                       |   2 +-
+ arch/xtensa/kernel/syscall.c                    |   2 +-
+ fs/hugetlbfs/inode.c                            |   2 +-
+ fs/proc/task_mmu.c                              |   4 -
+ mm/gup.c                                        |   5 -
+ mm/memory.c                                     |  38 ------
+ mm/mmap.c                                       | 149 ++++++++++++++----------
+ 23 files changed, 152 insertions(+), 163 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index d034a76bcae6..9eb4da6e980a 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3744,6 +3744,13 @@
+ 	spia_pedr=
+ 	spia_peddr=
+ 
++	stack_guard_gap=	[MM]
++			override the default stack gap protection. The value
++			is in page units and it defines how many pages prior
++			to (for stacks growing down) resp. after (for stacks
++			growing up) the main stack are reserved for no other
++			mapping. Default value is 256 pages.
++
+ 	stacktrace	[FTRACE]
+ 			Enabled the stack tracer on boot up.
+ 
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 3978a350e9e4..705bf34d0805 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1366,39 +1366,11 @@ int clear_page_dirty_for_io(struct page *page);
+ 
+ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
+ 
+-/* Is the vma a continuation of the stack vma above it? */
+-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
+-{
+-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+-}
+-
+ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+ {
+ 	return !vma->vm_ops;
+ }
+ 
+-static inline int stack_guard_page_start(struct vm_area_struct *vma,
+-					     unsigned long addr)
+-{
+-	return (vma->vm_flags & VM_GROWSDOWN) &&
+-		(vma->vm_start == addr) &&
+-		!vma_growsdown(vma->vm_prev, addr);
+-}
+-
+-/* Is the vma a continuation of the stack vma below it? */
+-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+-{
+-	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+-}
+-
+-static inline int stack_guard_page_end(struct vm_area_struct *vma,
+-					   unsigned long addr)
+-{
+-	return (vma->vm_flags & VM_GROWSUP) &&
+-		(vma->vm_end == addr) &&
+-		!vma_growsup(vma->vm_next, addr);
+-}
+-
+ int vma_is_stack_for_current(struct vm_area_struct *vma);
+ 
+ extern unsigned long move_page_tables(struct vm_area_struct *vma,
+@@ -2139,6 +2111,7 @@ void page_cache_async_readahead(struct address_space *mapping,
+ 				pgoff_t offset,
+ 				unsigned long size);
+ 
++extern unsigned long stack_guard_gap;
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+ 
+@@ -2167,6 +2140,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
+ 	return vma;
+ }
+ 
++static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
++{
++	unsigned long vm_start = vma->vm_start;
++
++	if (vma->vm_flags & VM_GROWSDOWN) {
++		vm_start -= stack_guard_gap;
++		if (vm_start > vma->vm_start)
++			vm_start = 0;
++	}
++	return vm_start;
++}
++
++static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
++{
++	unsigned long vm_end = vma->vm_end;
++
++	if (vma->vm_flags & VM_GROWSUP) {
++		vm_end += stack_guard_gap;
++		if (vm_end < vma->vm_end)
++			vm_end = -PAGE_SIZE;
++	}
++	return vm_end;
++}
++
+ static inline unsigned long vma_pages(struct vm_area_struct *vma)
+ {
+ 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
+index 2e06d56e987b..cf4ae6958240 100644
+--- a/arch/arc/mm/mmap.c
++++ b/arch/arc/mm/mmap.c
+@@ -64,7 +64,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
+index 66353caa35b9..641334ebf46d 100644
+--- a/arch/arm/mm/mmap.c
++++ b/arch/arm/mm/mmap.c
+@@ -89,7 +89,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -140,7 +140,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 			addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-				(!vma || addr + len <= vma->vm_start))
++				(!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
+index 836f14707a62..efa59f1f8022 100644
+--- a/arch/frv/mm/elf-fdpic.c
++++ b/arch/frv/mm/elf-fdpic.c
+@@ -74,7 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(current->mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			goto success;
+ 	}
+ 
+diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
+index d08ea3ff0f53..a44052c05f93 100644
+--- a/arch/mips/mm/mmap.c
++++ b/arch/mips/mm/mmap.c
+@@ -92,7 +92,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
+index bf3294171230..5fb3f0ef2906 100644
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ 	struct mm_struct *mm = current->mm;
+-	struct vm_area_struct *vma;
++	struct vm_area_struct *vma, *prev;
+ 	unsigned long task_size = TASK_SIZE;
+ 	int do_color_align, last_mmap;
+ 	struct vm_unmapped_area_info info;
+@@ -115,9 +115,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		else
+ 			addr = PAGE_ALIGN(addr);
+ 
+-		vma = find_vma(mm, addr);
++		vma = find_vma_prev(mm, addr, &prev);
+ 		if (task_size - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)) &&
++		    (!prev || addr >= vm_end_gap(prev)))
+ 			goto found_addr;
+ 	}
+ 
+@@ -141,7 +142,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 			  const unsigned long len, const unsigned long pgoff,
+ 			  const unsigned long flags)
+ {
+-	struct vm_area_struct *vma;
++	struct vm_area_struct *vma, *prev;
+ 	struct mm_struct *mm = current->mm;
+ 	unsigned long addr = addr0;
+ 	int do_color_align, last_mmap;
+@@ -175,9 +176,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
+ 		else
+ 			addr = PAGE_ALIGN(addr);
+-		vma = find_vma(mm, addr);
++
++		vma = find_vma_prev(mm, addr, &prev);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)) &&
++		    (!prev || addr >= vm_end_gap(prev)))
+ 			goto found_addr;
+ 	}
+ 
+diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
+index 35254a678456..a2b2d97f7eda 100644
+--- a/arch/powerpc/mm/hugetlbpage-radix.c
++++ b/arch/powerpc/mm/hugetlbpage-radix.c
+@@ -65,7 +65,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ 		addr = ALIGN(addr, huge_page_size(h));
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 	/*
+diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
+index 2f1e44362198..5bc2845cddf4 100644
+--- a/arch/powerpc/mm/mmap.c
++++ b/arch/powerpc/mm/mmap.c
+@@ -106,7 +106,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -142,7 +142,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-				(!vma || addr + len <= vma->vm_start))
++				(!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
+index 2b27458902ee..c4d5c9c61e0f 100644
+--- a/arch/powerpc/mm/slice.c
++++ b/arch/powerpc/mm/slice.c
+@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+ 	if ((mm->task_size - len) < addr)
+ 		return 0;
+ 	vma = find_vma(mm, addr);
+-	return (!vma || (addr + len) <= vma->vm_start);
++	return (!vma || (addr + len) <= vm_start_gap(vma));
+ }
+ 
+ static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
+index eb9df2822da1..812368f274c9 100644
+--- a/arch/s390/mm/mmap.c
++++ b/arch/s390/mm/mmap.c
+@@ -98,7 +98,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -136,7 +136,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-				(!vma || addr + len <= vma->vm_start))
++				(!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
+index 6777177807c2..7df7d5944188 100644
+--- a/arch/sh/mm/mmap.c
++++ b/arch/sh/mm/mmap.c
+@@ -63,7 +63,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
+index 884c70331345..f39dd87b77c4 100644
+--- a/arch/sparc/kernel/sys_sparc_64.c
++++ b/arch/sparc/kernel/sys_sparc_64.c
+@@ -118,7 +118,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (task_size - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -181,7 +181,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 
+ 		vma = find_vma(mm, addr);
+ 		if (task_size - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
+index 988acc8b1b80..58cde8d9be8a 100644
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -116,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ 		addr = ALIGN(addr, HPAGE_SIZE);
+ 		vma = find_vma(mm, addr);
+ 		if (task_size - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 	if (mm->get_unmapped_area == arch_get_unmapped_area)
+diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
+index 77ceaa343fce..67508b249ede 100644
+--- a/arch/tile/mm/hugetlbpage.c
++++ b/arch/tile/mm/hugetlbpage.c
+@@ -232,7 +232,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ 		addr = ALIGN(addr, huge_page_size(h));
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 	if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
+index a55ed63b9f91..1119414ab419 100644
+--- a/arch/x86/kernel/sys_x86_64.c
++++ b/arch/x86/kernel/sys_x86_64.c
+@@ -140,7 +140,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (end - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 		addr = PAGE_ALIGN(addr);
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-				(!vma || addr + len <= vma->vm_start))
++				(!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
+index 2ae8584b44c7..fe342e8ed529 100644
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -144,7 +144,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ 		addr = ALIGN(addr, huge_page_size(h));
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 	if (mm->get_unmapped_area == arch_get_unmapped_area)
+diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
+index d3fd100dffc9..dceb0ee37db3 100644
+--- a/arch/xtensa/kernel/syscall.c
++++ b/arch/xtensa/kernel/syscall.c
+@@ -87,7 +87,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		/* At this point:  (!vmm || addr < vmm->vm_end). */
+ 		if (TASK_SIZE - len < addr)
+ 			return -ENOMEM;
+-		if (!vmm || addr + len <= vmm->vm_start)
++		if (!vmm || addr + len <= vm_start_gap(vmm))
+ 			return addr;
+ 		addr = vmm->vm_end;
+ 		if (flags & MAP_SHARED)
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index 54de77e78775..19a787eb8a00 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -191,7 +191,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ 		addr = ALIGN(addr, huge_page_size(h));
+ 		vma = find_vma(mm, addr);
+ 		if (TASK_SIZE - len >= addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)))
+ 			return addr;
+ 	}
+ 
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index 52049b595b0f..5e6bab59e219 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -302,11 +302,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
+ 
+ 	/* We don't show the stack guard page in /proc/maps */
+ 	start = vma->vm_start;
+-	if (stack_guard_page_start(vma, start))
+-		start += PAGE_SIZE;
+ 	end = vma->vm_end;
+-	if (stack_guard_page_end(vma, end))
+-		end -= PAGE_SIZE;
+ 
+ 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+diff --git a/mm/gup.c b/mm/gup.c
+index bb5f3d69f87e..bdd74b782b6e 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -371,11 +371,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+ 	/* mlock all present pages, but do not fault in new pages */
+ 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ 		return -ENOENT;
+-	/* For mm_populate(), just skip the stack guard page. */
+-	if ((*flags & FOLL_POPULATE) &&
+-			(stack_guard_page_start(vma, address) ||
+-			 stack_guard_page_end(vma, address + PAGE_SIZE)))
+-		return -ENOENT;
+ 	if (*flags & FOLL_WRITE)
+ 		fault_flags |= FAULT_FLAG_WRITE;
+ 	if (*flags & FOLL_REMOTE)
+diff --git a/mm/memory.c b/mm/memory.c
+index c89214451507..6d54a9b15520 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2715,40 +2715,6 @@ int do_swap_page(struct vm_fault *vmf)
+ }
+ 
+ /*
+- * This is like a special single-page "expand_{down|up}wards()",
+- * except we must first make sure that 'address{-|+}PAGE_SIZE'
+- * doesn't hit another vma.
+- */
+-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+-{
+-	address &= PAGE_MASK;
+-	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+-		struct vm_area_struct *prev = vma->vm_prev;
+-
+-		/*
+-		 * Is there a mapping abutting this one below?
+-		 *
+-		 * That's only ok if it's the same stack mapping
+-		 * that has gotten split..
+-		 */
+-		if (prev && prev->vm_end == address)
+-			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+-
+-		return expand_downwards(vma, address - PAGE_SIZE);
+-	}
+-	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+-		struct vm_area_struct *next = vma->vm_next;
+-
+-		/* As VM_GROWSDOWN but s/below/above/ */
+-		if (next && next->vm_start == address + PAGE_SIZE)
+-			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+-
+-		return expand_upwards(vma, address + PAGE_SIZE);
+-	}
+-	return 0;
+-}
+-
+-/*
+  * We enter with non-exclusive mmap_sem (to exclude vma changes,
+  * but allow concurrent faults), and pte mapped but not yet locked.
+  * We return with mmap_sem still held, but pte unmapped and unlocked.
+@@ -2764,10 +2730,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
+ 	if (vma->vm_flags & VM_SHARED)
+ 		return VM_FAULT_SIGBUS;
+ 
+-	/* Check if we need to add a guard page to the stack */
+-	if (check_stack_guard_page(vma, vmf->address) < 0)
+-		return VM_FAULT_SIGSEGV;
+-
+ 	/*
+ 	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
+ 	 * pte_offset_map() on pmds where a huge pmd might be created
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 4b3a2aaa18e9..4acc20fc5c81 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ 	unsigned long retval;
+ 	unsigned long newbrk, oldbrk;
+ 	struct mm_struct *mm = current->mm;
++	struct vm_area_struct *next;
+ 	unsigned long min_brk;
+ 	bool populate;
+ 
+@@ -228,7 +229,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ 	}
+ 
+ 	/* Check against existing mmap mappings. */
+-	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
++	next = find_vma(mm, oldbrk);
++	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
+ 		goto out;
+ 
+ 	/* Ok, looks good - let it rip. */
+@@ -251,10 +253,22 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ 
+ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+ {
+-	unsigned long max, subtree_gap;
+-	max = vma->vm_start;
+-	if (vma->vm_prev)
+-		max -= vma->vm_prev->vm_end;
++	unsigned long max, prev_end, subtree_gap;
++
++	/*
++	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
++	 * allow two stack_guard_gaps between them here, and when choosing
++	 * an unmapped area; whereas when expanding we only require one.
++	 * That's a little inconsistent, but keeps the code here simpler.
++	 */
++	max = vm_start_gap(vma);
++	if (vma->vm_prev) {
++		prev_end = vm_end_gap(vma->vm_prev);
++		if (max > prev_end)
++			max -= prev_end;
++		else
++			max = 0;
++	}
+ 	if (vma->vm_rb.rb_left) {
+ 		subtree_gap = rb_entry(vma->vm_rb.rb_left,
+ 				struct vm_area_struct, vm_rb)->rb_subtree_gap;
+@@ -350,7 +364,7 @@ static void validate_mm(struct mm_struct *mm)
+ 			anon_vma_unlock_read(anon_vma);
+ 		}
+ 
+-		highest_address = vma->vm_end;
++		highest_address = vm_end_gap(vma);
+ 		vma = vma->vm_next;
+ 		i++;
+ 	}
+@@ -539,7 +553,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
+ 	if (vma->vm_next)
+ 		vma_gap_update(vma->vm_next);
+ 	else
+-		mm->highest_vm_end = vma->vm_end;
++		mm->highest_vm_end = vm_end_gap(vma);
+ 
+ 	/*
+ 	 * vma->vm_prev wasn't known when we followed the rbtree to find the
+@@ -854,7 +868,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 			vma_gap_update(vma);
+ 		if (end_changed) {
+ 			if (!next)
+-				mm->highest_vm_end = end;
++				mm->highest_vm_end = vm_end_gap(vma);
+ 			else if (!adjust_next)
+ 				vma_gap_update(next);
+ 		}
+@@ -939,7 +953,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ 			 * mm->highest_vm_end doesn't need any update
+ 			 * in remove_next == 1 case.
+ 			 */
+-			VM_WARN_ON(mm->highest_vm_end != end);
++			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
+ 		}
+ 	}
+ 	if (insert && file)
+@@ -1783,7 +1797,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+ 
+ 	while (true) {
+ 		/* Visit left subtree if it looks promising */
+-		gap_end = vma->vm_start;
++		gap_end = vm_start_gap(vma);
+ 		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
+ 			struct vm_area_struct *left =
+ 				rb_entry(vma->vm_rb.rb_left,
+@@ -1794,7 +1808,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+ 			}
+ 		}
+ 
+-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ check_current:
+ 		/* Check if current node has a suitable gap */
+ 		if (gap_start > high_limit)
+@@ -1821,8 +1835,8 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+ 			vma = rb_entry(rb_parent(prev),
+ 				       struct vm_area_struct, vm_rb);
+ 			if (prev == vma->vm_rb.rb_left) {
+-				gap_start = vma->vm_prev->vm_end;
+-				gap_end = vma->vm_start;
++				gap_start = vm_end_gap(vma->vm_prev);
++				gap_end = vm_start_gap(vma);
+ 				goto check_current;
+ 			}
+ 		}
+@@ -1886,7 +1900,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+ 
+ 	while (true) {
+ 		/* Visit right subtree if it looks promising */
+-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ 		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+ 			struct vm_area_struct *right =
+ 				rb_entry(vma->vm_rb.rb_right,
+@@ -1899,7 +1913,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+ 
+ check_current:
+ 		/* Check if current node has a suitable gap */
+-		gap_end = vma->vm_start;
++		gap_end = vm_start_gap(vma);
+ 		if (gap_end < low_limit)
+ 			return -ENOMEM;
+ 		if (gap_start <= high_limit && gap_end - gap_start >= length)
+@@ -1925,7 +1939,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+ 				       struct vm_area_struct, vm_rb);
+ 			if (prev == vma->vm_rb.rb_right) {
+ 				gap_start = vma->vm_prev ?
+-					vma->vm_prev->vm_end : 0;
++					vm_end_gap(vma->vm_prev) : 0;
+ 				goto check_current;
+ 			}
+ 		}
+@@ -1963,7 +1977,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 		unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ 	struct mm_struct *mm = current->mm;
+-	struct vm_area_struct *vma;
++	struct vm_area_struct *vma, *prev;
+ 	struct vm_unmapped_area_info info;
+ 
+ 	if (len > TASK_SIZE - mmap_min_addr)
+@@ -1974,9 +1988,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ 
+ 	if (addr) {
+ 		addr = PAGE_ALIGN(addr);
+-		vma = find_vma(mm, addr);
++		vma = find_vma_prev(mm, addr, &prev);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-		    (!vma || addr + len <= vma->vm_start))
++		    (!vma || addr + len <= vm_start_gap(vma)) &&
++		    (!prev || addr >= vm_end_gap(prev)))
+ 			return addr;
+ 	}
+ 
+@@ -1999,7 +2014,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 			  const unsigned long len, const unsigned long pgoff,
+ 			  const unsigned long flags)
+ {
+-	struct vm_area_struct *vma;
++	struct vm_area_struct *vma, *prev;
+ 	struct mm_struct *mm = current->mm;
+ 	unsigned long addr = addr0;
+ 	struct vm_unmapped_area_info info;
+@@ -2014,9 +2029,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ 	/* requesting a specific address */
+ 	if (addr) {
+ 		addr = PAGE_ALIGN(addr);
+-		vma = find_vma(mm, addr);
++		vma = find_vma_prev(mm, addr, &prev);
+ 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+-				(!vma || addr + len <= vma->vm_start))
++				(!vma || addr + len <= vm_start_gap(vma)) &&
++				(!prev || addr >= vm_end_gap(prev)))
+ 			return addr;
+ 	}
+ 
+@@ -2151,21 +2167,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
+  * update accounting. This is shared with both the
+  * grow-up and grow-down cases.
+  */
+-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
++static int acct_stack_growth(struct vm_area_struct *vma,
++			     unsigned long size, unsigned long grow)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	struct rlimit *rlim = current->signal->rlim;
+-	unsigned long new_start, actual_size;
++	unsigned long new_start;
+ 
+ 	/* address space limit tests */
+ 	if (!may_expand_vm(mm, vma->vm_flags, grow))
+ 		return -ENOMEM;
+ 
+ 	/* Stack limit test */
+-	actual_size = size;
+-	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+-		actual_size -= PAGE_SIZE;
+-	if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
++	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ 		return -ENOMEM;
+ 
+ 	/* mlock limit tests */
+@@ -2203,17 +2217,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
+ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
++	struct vm_area_struct *next;
++	unsigned long gap_addr;
+ 	int error = 0;
+ 
+ 	if (!(vma->vm_flags & VM_GROWSUP))
+ 		return -EFAULT;
+ 
+ 	/* Guard against wrapping around to address 0. */
+-	if (address < PAGE_ALIGN(address+4))
+-		address = PAGE_ALIGN(address+4);
+-	else
++	address &= PAGE_MASK;
++	address += PAGE_SIZE;
++	if (!address)
+ 		return -ENOMEM;
+ 
++	/* Enforce stack_guard_gap */
++	gap_addr = address + stack_guard_gap;
++	if (gap_addr < address)
++		return -ENOMEM;
++	next = vma->vm_next;
++	if (next && next->vm_start < gap_addr) {
++		if (!(next->vm_flags & VM_GROWSUP))
++			return -ENOMEM;
++		/* Check that both stack segments have the same anon_vma? */
++	}
++
+ 	/* We must make sure the anon_vma is allocated. */
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		return -ENOMEM;
+@@ -2257,7 +2284,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ 				if (vma->vm_next)
+ 					vma_gap_update(vma->vm_next);
+ 				else
+-					mm->highest_vm_end = address;
++					mm->highest_vm_end = vm_end_gap(vma);
+ 				spin_unlock(&mm->page_table_lock);
+ 
+ 				perf_event_mmap(vma);
+@@ -2278,6 +2305,8 @@ int expand_downwards(struct vm_area_struct *vma,
+ 				   unsigned long address)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
++	struct vm_area_struct *prev;
++	unsigned long gap_addr;
+ 	int error;
+ 
+ 	address &= PAGE_MASK;
+@@ -2285,6 +2314,17 @@ int expand_downwards(struct vm_area_struct *vma,
+ 	if (error)
+ 		return error;
+ 
++	/* Enforce stack_guard_gap */
++	gap_addr = address - stack_guard_gap;
++	if (gap_addr > address)
++		return -ENOMEM;
++	prev = vma->vm_prev;
++	if (prev && prev->vm_end > gap_addr) {
++		if (!(prev->vm_flags & VM_GROWSDOWN))
++			return -ENOMEM;
++		/* Check that both stack segments have the same anon_vma? */
++	}
++
+ 	/* We must make sure the anon_vma is allocated. */
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		return -ENOMEM;
+@@ -2339,28 +2379,25 @@ int expand_downwards(struct vm_area_struct *vma,
+ 	return error;
+ }
+ 
+-/*
+- * Note how expand_stack() refuses to expand the stack all the way to
+- * abut the next virtual mapping, *unless* that mapping itself is also
+- * a stack mapping. We want to leave room for a guard page, after all
+- * (the guard page itself is not added here, that is done by the
+- * actual page faulting logic)
+- *
+- * This matches the behavior of the guard page logic (see mm/memory.c:
+- * check_stack_guard_page()), which only allows the guard page to be
+- * removed under these circumstances.
+- */
++/* enforced gap between the expanding stack and other mappings. */
++unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
++
++static int __init cmdline_parse_stack_guard_gap(char *p)
++{
++	unsigned long val;
++	char *endptr;
++
++	val = simple_strtoul(p, &endptr, 10);
++	if (!*endptr)
++		stack_guard_gap = val << PAGE_SHIFT;
++
++	return 0;
++}
++__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
++
+ #ifdef CONFIG_STACK_GROWSUP
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-	struct vm_area_struct *next;
+-
+-	address &= PAGE_MASK;
+-	next = vma->vm_next;
+-	if (next && next->vm_start == address + PAGE_SIZE) {
+-		if (!(next->vm_flags & VM_GROWSUP))
+-			return -ENOMEM;
+-	}
+ 	return expand_upwards(vma, address);
+ }
+ 
+@@ -2382,14 +2419,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
+ #else
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+-	struct vm_area_struct *prev;
+-
+-	address &= PAGE_MASK;
+-	prev = vma->vm_prev;
+-	if (prev && prev->vm_end == address) {
+-		if (!(prev->vm_flags & VM_GROWSDOWN))
+-			return -ENOMEM;
+-	}
+ 	return expand_downwards(vma, address);
+ }
+ 
+@@ -2487,7 +2516,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
+ 		vma->vm_prev = prev;
+ 		vma_gap_update(vma);
+ 	} else
+-		mm->highest_vm_end = prev ? prev->vm_end : 0;
++		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+ 	tail_vma->vm_next = NULL;
+ 
+ 	/* Kill the cache */
+-- 
+2.11.0
+

+ 5 - 5
mm-fix-new-crash-in-unmapped_area_topdown.patch → CVE-2017-100364-0004-mm-fix-new-crash-in-unmapped_area_topdown.patch

@@ -1,7 +1,7 @@
-From f4cb767d76cf7ee72f97dd76f6cfa6c76a5edc89 Mon Sep 17 00:00:00 2001
+From e9a8b9465f735bda98313627c38300796694f188 Mon Sep 17 00:00:00 2001
 From: Hugh Dickins <[email protected]>
 Date: Tue, 20 Jun 2017 02:10:44 -0700
-Subject: [PATCH] mm: fix new crash in unmapped_area_topdown()
+Subject: [PATCH 4/4] mm: fix new crash in unmapped_area_topdown()
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -26,10 +26,10 @@ Signed-off-by: Fabian Grünbichler <[email protected]>
  1 file changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/mm/mmap.c b/mm/mmap.c
-index 8e07976d5e47..290b77d9a01e 100644
+index 4acc20fc5c81..ece0e5a2a25b 100644
 --- a/mm/mmap.c
 +++ b/mm/mmap.c
-@@ -1817,7 +1817,8 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+@@ -1813,7 +1813,8 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
  		/* Check if current node has a suitable gap */
  		if (gap_start > high_limit)
  			return -ENOMEM;
@@ -39,7 +39,7 @@ index 8e07976d5e47..290b77d9a01e 100644
  			goto found;
  
  		/* Visit right subtree if it looks promising */
-@@ -1920,7 +1921,8 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+@@ -1916,7 +1917,8 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
  		gap_end = vm_start_gap(vma);
  		if (gap_end < low_limit)
  			return -ENOMEM;

+ 4 - 1
Makefile

@@ -243,7 +243,10 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNEL_SRC_SUBMODULE} | submodules
 	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-9242-ipv6-fix-out-of-bound-writes-in-__ip6_append_data.patch
 	cd ${KERNEL_SRC}; patch -p1 < ../pinctl-amd-ryzen-01-make-use-of-raw_spinlock-variants.patch
 	cd ${KERNEL_SRC}; patch -p1 < ../pinctl-amd-ryzen-02-Use-regular-interrupt-instead-of-chained.patch
-	cd ${KERNEL_SRC}; patch -p1 < ../mm-fix-new-crash-in-unmapped_area_topdown.patch
+	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-100364-0001-Revert-mm-enlarge-stack-guard-gap.patch
+	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-100364-0002-Revert-mm-do-not-collapse-stack-gap-into-THP.patch
+	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-100364-0003-mm-larger-stack-guard-gap-between-vmas.patch
+	cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-100364-0004-mm-fix-new-crash-in-unmapped_area_topdown.patch
 	sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/'
 	touch $@