1 From 3982d0807e02909957990f194c5ed2ffb6ab6c35 Mon Sep 17 00:00:00 2001
2 From: Hugh Dickins <hughd@google.com>
3 Date: Mon, 19 Jun 2017 04:03:24 -0700
4 Subject: [PATCH 1/3] mm: larger stack guard gap, between vmas
6 commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream.
8 Stack guard page is a useful feature to reduce a risk of stack smashing
9 into a different mapping. We have been using a single page gap which
10 is sufficient to prevent having stack adjacent to a different mapping.
11 But this seems to be insufficient in the light of the stack usage in
12 userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
13 used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
14 which is 256kB or stack strings with MAX_ARG_STRLEN.
16 This will become especially dangerous for suid binaries and the default
17 no limit for the stack size limit because those applications can be
18 tricked to consume a large portion of the stack and a single glibc call
19 could jump over the guard page. These attacks are not theoretical,
22 Make those attacks less probable by increasing the stack guard gap
23 to 1MB (on systems with 4k pages; but make it depend on the page size
24 because systems with larger base pages might cap stack allocations in
25 the PAGE_SIZE units) which should cover larger alloca() and VLA stack
26 allocations. It is obviously not a full fix because the problem is
27 somehow inherent, but it should reduce attack space a lot.
29 One could argue that the gap size should be configurable from userspace,
30 but that can be done later when somebody finds that the new 1MB is wrong
31 for some special case applications. For now, add a kernel command line
32 option (stack_guard_gap) to specify the stack gap size (in page units).
34 Implementation wise, first delete all the old code for stack guard page:
35 because although we could get away with accounting one extra page in a
36 stack vma, accounting a larger gap can break userspace - case in point,
37 a program run with "ulimit -S -v 20000" failed when the 1MB gap was
38 counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
39 and strict non-overcommit mode.
41 Instead of keeping gap inside the stack vma, maintain the stack guard
42 gap as a gap between vmas: using vm_start_gap() in place of vm_start
43 (or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
44 places which need to respect the gap - mainly arch_get_unmapped_area(),
45 and and the vma tree's subtree_gap support for that.
47 Original-patch-by: Oleg Nesterov <oleg@redhat.com>
48 Original-patch-by: Michal Hocko <mhocko@suse.com>
49 Signed-off-by: Hugh Dickins <hughd@google.com>
50 Acked-by: Michal Hocko <mhocko@suse.com>
51 Tested-by: Helge Deller <deller@gmx.de> # parisc
52 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
53 [wt: backport to 4.11: adjust context]
54 [wt: backport to 4.9: adjust context ; kernel doc was not in admin-guide]
55 [wt: backport to 4.4: adjust context ; drop ppc hugetlb_radix changes]
56 Signed-off-by: Willy Tarreau <w@1wt.eu>
57 [gkh: minor build fixes for 4.4]
58 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
60 Documentation/kernel-parameters.txt | 7 ++
61 arch/arc/mm/mmap.c | 2 +-
62 arch/arm/mm/mmap.c | 4 +-
63 arch/frv/mm/elf-fdpic.c | 2 +-
64 arch/mips/mm/mmap.c | 2 +-
65 arch/parisc/kernel/sys_parisc.c | 15 ++--
66 arch/powerpc/mm/slice.c | 2 +-
67 arch/s390/mm/mmap.c | 4 +-
68 arch/sh/mm/mmap.c | 4 +-
69 arch/sparc/kernel/sys_sparc_64.c | 4 +-
70 arch/sparc/mm/hugetlbpage.c | 2 +-
71 arch/tile/mm/hugetlbpage.c | 2 +-
72 arch/x86/kernel/sys_x86_64.c | 4 +-
73 arch/x86/mm/hugetlbpage.c | 2 +-
74 arch/xtensa/kernel/syscall.c | 2 +-
75 fs/hugetlbfs/inode.c | 2 +-
76 fs/proc/task_mmu.c | 4 -
77 include/linux/mm.h | 53 ++++++-------
79 mm/memory.c | 38 ---------
80 mm/mmap.c | 149 +++++++++++++++++++++---------------
81 21 files changed, 149 insertions(+), 160 deletions(-)
83 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
84 index c360f80..9738c8b 100644
85 --- a/Documentation/kernel-parameters.txt
86 +++ b/Documentation/kernel-parameters.txt
87 @@ -3576,6 +3576,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
91 + stack_guard_gap= [MM]
92 + override the default stack gap protection. The value
93 + is in page units and it defines how many pages prior
94 + to (for stacks growing down) resp. after (for stacks
95 + growing up) the main stack are reserved for no other
96 + mapping. Default value is 256 pages.
99 Enabled the stack tracer on boot up.
101 diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
102 index 2e06d56..cf4ae69 100644
103 --- a/arch/arc/mm/mmap.c
104 +++ b/arch/arc/mm/mmap.c
105 @@ -64,7 +64,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
107 vma = find_vma(mm, addr);
108 if (TASK_SIZE - len >= addr &&
109 - (!vma || addr + len <= vma->vm_start))
110 + (!vma || addr + len <= vm_start_gap(vma)))
114 diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
115 index 407dc78..c469c06 100644
116 --- a/arch/arm/mm/mmap.c
117 +++ b/arch/arm/mm/mmap.c
118 @@ -89,7 +89,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
120 vma = find_vma(mm, addr);
121 if (TASK_SIZE - len >= addr &&
122 - (!vma || addr + len <= vma->vm_start))
123 + (!vma || addr + len <= vm_start_gap(vma)))
127 @@ -140,7 +140,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
128 addr = PAGE_ALIGN(addr);
129 vma = find_vma(mm, addr);
130 if (TASK_SIZE - len >= addr &&
131 - (!vma || addr + len <= vma->vm_start))
132 + (!vma || addr + len <= vm_start_gap(vma)))
136 diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
137 index 836f147..efa59f1 100644
138 --- a/arch/frv/mm/elf-fdpic.c
139 +++ b/arch/frv/mm/elf-fdpic.c
140 @@ -74,7 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
141 addr = PAGE_ALIGN(addr);
142 vma = find_vma(current->mm, addr);
143 if (TASK_SIZE - len >= addr &&
144 - (!vma || addr + len <= vma->vm_start))
145 + (!vma || addr + len <= vm_start_gap(vma)))
149 diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
150 index 5c81fdd..025cb31 100644
151 --- a/arch/mips/mm/mmap.c
152 +++ b/arch/mips/mm/mmap.c
153 @@ -92,7 +92,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
155 vma = find_vma(mm, addr);
156 if (TASK_SIZE - len >= addr &&
157 - (!vma || addr + len <= vma->vm_start))
158 + (!vma || addr + len <= vm_start_gap(vma)))
162 diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
163 index 5aba01a..4dda73c 100644
164 --- a/arch/parisc/kernel/sys_parisc.c
165 +++ b/arch/parisc/kernel/sys_parisc.c
166 @@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
167 unsigned long len, unsigned long pgoff, unsigned long flags)
169 struct mm_struct *mm = current->mm;
170 - struct vm_area_struct *vma;
171 + struct vm_area_struct *vma, *prev;
172 unsigned long task_size = TASK_SIZE;
173 int do_color_align, last_mmap;
174 struct vm_unmapped_area_info info;
175 @@ -115,9 +115,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
177 addr = PAGE_ALIGN(addr);
179 - vma = find_vma(mm, addr);
180 + vma = find_vma_prev(mm, addr, &prev);
181 if (task_size - len >= addr &&
182 - (!vma || addr + len <= vma->vm_start))
183 + (!vma || addr + len <= vm_start_gap(vma)) &&
184 + (!prev || addr >= vm_end_gap(prev)))
188 @@ -141,7 +142,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
189 const unsigned long len, const unsigned long pgoff,
190 const unsigned long flags)
192 - struct vm_area_struct *vma;
193 + struct vm_area_struct *vma, *prev;
194 struct mm_struct *mm = current->mm;
195 unsigned long addr = addr0;
196 int do_color_align, last_mmap;
197 @@ -175,9 +176,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
198 addr = COLOR_ALIGN(addr, last_mmap, pgoff);
200 addr = PAGE_ALIGN(addr);
201 - vma = find_vma(mm, addr);
203 + vma = find_vma_prev(mm, addr, &prev);
204 if (TASK_SIZE - len >= addr &&
205 - (!vma || addr + len <= vma->vm_start))
206 + (!vma || addr + len <= vm_start_gap(vma)) &&
207 + (!prev || addr >= vm_end_gap(prev)))
211 diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
212 index 0f432a7..6ad12b2 100644
213 --- a/arch/powerpc/mm/slice.c
214 +++ b/arch/powerpc/mm/slice.c
215 @@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
216 if ((mm->task_size - len) < addr)
218 vma = find_vma(mm, addr);
219 - return (!vma || (addr + len) <= vma->vm_start);
220 + return (!vma || (addr + len) <= vm_start_gap(vma));
223 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
224 diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
225 index f2b6b1d..126c4a9 100644
226 --- a/arch/s390/mm/mmap.c
227 +++ b/arch/s390/mm/mmap.c
228 @@ -97,7 +97,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
229 addr = PAGE_ALIGN(addr);
230 vma = find_vma(mm, addr);
231 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
232 - (!vma || addr + len <= vma->vm_start))
233 + (!vma || addr + len <= vm_start_gap(vma)))
237 @@ -135,7 +135,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
238 addr = PAGE_ALIGN(addr);
239 vma = find_vma(mm, addr);
240 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
241 - (!vma || addr + len <= vma->vm_start))
242 + (!vma || addr + len <= vm_start_gap(vma)))
246 diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
247 index 6777177..7df7d59 100644
248 --- a/arch/sh/mm/mmap.c
249 +++ b/arch/sh/mm/mmap.c
250 @@ -63,7 +63,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
252 vma = find_vma(mm, addr);
253 if (TASK_SIZE - len >= addr &&
254 - (!vma || addr + len <= vma->vm_start))
255 + (!vma || addr + len <= vm_start_gap(vma)))
259 @@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
261 vma = find_vma(mm, addr);
262 if (TASK_SIZE - len >= addr &&
263 - (!vma || addr + len <= vma->vm_start))
264 + (!vma || addr + len <= vm_start_gap(vma)))
268 diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
269 index c690c8e..7f0f7c01 100644
270 --- a/arch/sparc/kernel/sys_sparc_64.c
271 +++ b/arch/sparc/kernel/sys_sparc_64.c
272 @@ -118,7 +118,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
274 vma = find_vma(mm, addr);
275 if (task_size - len >= addr &&
276 - (!vma || addr + len <= vma->vm_start))
277 + (!vma || addr + len <= vm_start_gap(vma)))
281 @@ -181,7 +181,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
283 vma = find_vma(mm, addr);
284 if (task_size - len >= addr &&
285 - (!vma || addr + len <= vma->vm_start))
286 + (!vma || addr + len <= vm_start_gap(vma)))
290 diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
291 index da11424..ffa842b 100644
292 --- a/arch/sparc/mm/hugetlbpage.c
293 +++ b/arch/sparc/mm/hugetlbpage.c
294 @@ -115,7 +115,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
295 addr = ALIGN(addr, HPAGE_SIZE);
296 vma = find_vma(mm, addr);
297 if (task_size - len >= addr &&
298 - (!vma || addr + len <= vma->vm_start))
299 + (!vma || addr + len <= vm_start_gap(vma)))
302 if (mm->get_unmapped_area == arch_get_unmapped_area)
303 diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
304 index c034dc3..c97ee6c 100644
305 --- a/arch/tile/mm/hugetlbpage.c
306 +++ b/arch/tile/mm/hugetlbpage.c
307 @@ -232,7 +232,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
308 addr = ALIGN(addr, huge_page_size(h));
309 vma = find_vma(mm, addr);
310 if (TASK_SIZE - len >= addr &&
311 - (!vma || addr + len <= vma->vm_start))
312 + (!vma || addr + len <= vm_start_gap(vma)))
315 if (current->mm->get_unmapped_area == arch_get_unmapped_area)
316 diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
317 index 10e0272..136ad7c 100644
318 --- a/arch/x86/kernel/sys_x86_64.c
319 +++ b/arch/x86/kernel/sys_x86_64.c
320 @@ -143,7 +143,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
321 addr = PAGE_ALIGN(addr);
322 vma = find_vma(mm, addr);
323 if (end - len >= addr &&
324 - (!vma || addr + len <= vma->vm_start))
325 + (!vma || addr + len <= vm_start_gap(vma)))
329 @@ -186,7 +186,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
330 addr = PAGE_ALIGN(addr);
331 vma = find_vma(mm, addr);
332 if (TASK_SIZE - len >= addr &&
333 - (!vma || addr + len <= vma->vm_start))
334 + (!vma || addr + len <= vm_start_gap(vma)))
338 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
339 index 42982b2..39bdaf3 100644
340 --- a/arch/x86/mm/hugetlbpage.c
341 +++ b/arch/x86/mm/hugetlbpage.c
342 @@ -144,7 +144,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
343 addr = ALIGN(addr, huge_page_size(h));
344 vma = find_vma(mm, addr);
345 if (TASK_SIZE - len >= addr &&
346 - (!vma || addr + len <= vma->vm_start))
347 + (!vma || addr + len <= vm_start_gap(vma)))
350 if (mm->get_unmapped_area == arch_get_unmapped_area)
351 diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
352 index 83cf496..3aaaae1 100644
353 --- a/arch/xtensa/kernel/syscall.c
354 +++ b/arch/xtensa/kernel/syscall.c
355 @@ -87,7 +87,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
356 /* At this point: (!vmm || addr < vmm->vm_end). */
357 if (TASK_SIZE - len < addr)
359 - if (!vmm || addr + len <= vmm->vm_start)
360 + if (!vmm || addr + len <= vm_start_gap(vmm))
363 if (flags & MAP_SHARED)
364 diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
365 index 595ebdb..a17da8b5 100644
366 --- a/fs/hugetlbfs/inode.c
367 +++ b/fs/hugetlbfs/inode.c
368 @@ -191,7 +191,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
369 addr = ALIGN(addr, huge_page_size(h));
370 vma = find_vma(mm, addr);
371 if (TASK_SIZE - len >= addr &&
372 - (!vma || addr + len <= vma->vm_start))
373 + (!vma || addr + len <= vm_start_gap(vma)))
377 diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
378 index d598b9c..cb7020c 100644
379 --- a/fs/proc/task_mmu.c
380 +++ b/fs/proc/task_mmu.c
381 @@ -295,11 +295,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
383 /* We don't show the stack guard page in /proc/maps */
384 start = vma->vm_start;
385 - if (stack_guard_page_start(vma, start))
386 - start += PAGE_SIZE;
388 - if (stack_guard_page_end(vma, end))
391 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
392 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
393 diff --git a/include/linux/mm.h b/include/linux/mm.h
394 index f0ffa01..55f950a 100644
395 --- a/include/linux/mm.h
396 +++ b/include/linux/mm.h
397 @@ -1278,39 +1278,11 @@ int clear_page_dirty_for_io(struct page *page);
399 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
401 -/* Is the vma a continuation of the stack vma above it? */
402 -static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
404 - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
407 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
412 -static inline int stack_guard_page_start(struct vm_area_struct *vma,
413 - unsigned long addr)
415 - return (vma->vm_flags & VM_GROWSDOWN) &&
416 - (vma->vm_start == addr) &&
417 - !vma_growsdown(vma->vm_prev, addr);
420 -/* Is the vma a continuation of the stack vma below it? */
421 -static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
423 - return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
426 -static inline int stack_guard_page_end(struct vm_area_struct *vma,
427 - unsigned long addr)
429 - return (vma->vm_flags & VM_GROWSUP) &&
430 - (vma->vm_end == addr) &&
431 - !vma_growsup(vma->vm_next, addr);
434 int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
436 extern unsigned long move_page_tables(struct vm_area_struct *vma,
437 @@ -2012,6 +1984,7 @@ void page_cache_async_readahead(struct address_space *mapping,
441 +extern unsigned long stack_guard_gap;
442 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
443 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
445 @@ -2040,6 +2013,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
449 +static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
451 + unsigned long vm_start = vma->vm_start;
453 + if (vma->vm_flags & VM_GROWSDOWN) {
454 + vm_start -= stack_guard_gap;
455 + if (vm_start > vma->vm_start)
461 +static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
463 + unsigned long vm_end = vma->vm_end;
465 + if (vma->vm_flags & VM_GROWSUP) {
466 + vm_end += stack_guard_gap;
467 + if (vm_end < vma->vm_end)
468 + vm_end = -PAGE_SIZE;
473 static inline unsigned long vma_pages(struct vm_area_struct *vma)
475 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
476 diff --git a/mm/gup.c b/mm/gup.c
477 index 4b0b7e7..b599526 100644
480 @@ -312,11 +312,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
481 /* mlock all present pages, but do not fault in new pages */
482 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
484 - /* For mm_populate(), just skip the stack guard page. */
485 - if ((*flags & FOLL_POPULATE) &&
486 - (stack_guard_page_start(vma, address) ||
487 - stack_guard_page_end(vma, address + PAGE_SIZE)))
489 if (*flags & FOLL_WRITE)
490 fault_flags |= FAULT_FLAG_WRITE;
492 diff --git a/mm/memory.c b/mm/memory.c
493 index 76dcee3..e6fa134 100644
496 @@ -2662,40 +2662,6 @@ out_release:
500 - * This is like a special single-page "expand_{down|up}wards()",
501 - * except we must first make sure that 'address{-|+}PAGE_SIZE'
502 - * doesn't hit another vma.
504 -static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
506 - address &= PAGE_MASK;
507 - if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
508 - struct vm_area_struct *prev = vma->vm_prev;
511 - * Is there a mapping abutting this one below?
513 - * That's only ok if it's the same stack mapping
514 - * that has gotten split..
516 - if (prev && prev->vm_end == address)
517 - return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
519 - return expand_downwards(vma, address - PAGE_SIZE);
521 - if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
522 - struct vm_area_struct *next = vma->vm_next;
524 - /* As VM_GROWSDOWN but s/below/above/ */
525 - if (next && next->vm_start == address + PAGE_SIZE)
526 - return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
528 - return expand_upwards(vma, address + PAGE_SIZE);
534 * We enter with non-exclusive mmap_sem (to exclude vma changes,
535 * but allow concurrent faults), and pte mapped but not yet locked.
536 * We return with mmap_sem still held, but pte unmapped and unlocked.
537 @@ -2715,10 +2681,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
538 if (vma->vm_flags & VM_SHARED)
539 return VM_FAULT_SIGBUS;
541 - /* Check if we need to add a guard page to the stack */
542 - if (check_stack_guard_page(vma, address) < 0)
543 - return VM_FAULT_SIGSEGV;
545 /* Use the zero-page for reads */
546 if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
547 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
548 diff --git a/mm/mmap.c b/mm/mmap.c
549 index 455772a..5e043dd 100644
552 @@ -288,6 +288,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
553 unsigned long retval;
554 unsigned long newbrk, oldbrk;
555 struct mm_struct *mm = current->mm;
556 + struct vm_area_struct *next;
557 unsigned long min_brk;
560 @@ -332,7 +333,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
563 /* Check against existing mmap mappings. */
564 - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
565 + next = find_vma(mm, oldbrk);
566 + if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
569 /* Ok, looks good - let it rip. */
570 @@ -355,10 +357,22 @@ out:
572 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
574 - unsigned long max, subtree_gap;
575 - max = vma->vm_start;
577 - max -= vma->vm_prev->vm_end;
578 + unsigned long max, prev_end, subtree_gap;
581 + * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
582 + * allow two stack_guard_gaps between them here, and when choosing
583 + * an unmapped area; whereas when expanding we only require one.
584 + * That's a little inconsistent, but keeps the code here simpler.
586 + max = vm_start_gap(vma);
587 + if (vma->vm_prev) {
588 + prev_end = vm_end_gap(vma->vm_prev);
589 + if (max > prev_end)
594 if (vma->vm_rb.rb_left) {
595 subtree_gap = rb_entry(vma->vm_rb.rb_left,
596 struct vm_area_struct, vm_rb)->rb_subtree_gap;
597 @@ -451,7 +465,7 @@ static void validate_mm(struct mm_struct *mm)
598 anon_vma_unlock_read(anon_vma);
601 - highest_address = vma->vm_end;
602 + highest_address = vm_end_gap(vma);
606 @@ -620,7 +634,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
608 vma_gap_update(vma->vm_next);
610 - mm->highest_vm_end = vma->vm_end;
611 + mm->highest_vm_end = vm_end_gap(vma);
614 * vma->vm_prev wasn't known when we followed the rbtree to find the
615 @@ -866,7 +880,7 @@ again: remove_next = 1 + (end > next->vm_end);
619 - mm->highest_vm_end = end;
620 + mm->highest_vm_end = vm_end_gap(vma);
621 else if (!adjust_next)
622 vma_gap_update(next);
624 @@ -909,7 +923,7 @@ again: remove_next = 1 + (end > next->vm_end);
626 vma_gap_update(next);
628 - mm->highest_vm_end = end;
629 + VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
633 @@ -1741,7 +1755,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
636 /* Visit left subtree if it looks promising */
637 - gap_end = vma->vm_start;
638 + gap_end = vm_start_gap(vma);
639 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
640 struct vm_area_struct *left =
641 rb_entry(vma->vm_rb.rb_left,
642 @@ -1752,7 +1766,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
646 - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
647 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
649 /* Check if current node has a suitable gap */
650 if (gap_start > high_limit)
651 @@ -1779,8 +1793,8 @@ check_current:
652 vma = rb_entry(rb_parent(prev),
653 struct vm_area_struct, vm_rb);
654 if (prev == vma->vm_rb.rb_left) {
655 - gap_start = vma->vm_prev->vm_end;
656 - gap_end = vma->vm_start;
657 + gap_start = vm_end_gap(vma->vm_prev);
658 + gap_end = vm_start_gap(vma);
662 @@ -1844,7 +1858,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
665 /* Visit right subtree if it looks promising */
666 - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
667 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
668 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
669 struct vm_area_struct *right =
670 rb_entry(vma->vm_rb.rb_right,
671 @@ -1857,7 +1871,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
674 /* Check if current node has a suitable gap */
675 - gap_end = vma->vm_start;
676 + gap_end = vm_start_gap(vma);
677 if (gap_end < low_limit)
679 if (gap_start <= high_limit && gap_end - gap_start >= length)
680 @@ -1883,7 +1897,7 @@ check_current:
681 struct vm_area_struct, vm_rb);
682 if (prev == vma->vm_rb.rb_right) {
683 gap_start = vma->vm_prev ?
684 - vma->vm_prev->vm_end : 0;
685 + vm_end_gap(vma->vm_prev) : 0;
689 @@ -1921,7 +1935,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
690 unsigned long len, unsigned long pgoff, unsigned long flags)
692 struct mm_struct *mm = current->mm;
693 - struct vm_area_struct *vma;
694 + struct vm_area_struct *vma, *prev;
695 struct vm_unmapped_area_info info;
697 if (len > TASK_SIZE - mmap_min_addr)
698 @@ -1932,9 +1946,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
701 addr = PAGE_ALIGN(addr);
702 - vma = find_vma(mm, addr);
703 + vma = find_vma_prev(mm, addr, &prev);
704 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
705 - (!vma || addr + len <= vma->vm_start))
706 + (!vma || addr + len <= vm_start_gap(vma)) &&
707 + (!prev || addr >= vm_end_gap(prev)))
711 @@ -1957,7 +1972,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
712 const unsigned long len, const unsigned long pgoff,
713 const unsigned long flags)
715 - struct vm_area_struct *vma;
716 + struct vm_area_struct *vma, *prev;
717 struct mm_struct *mm = current->mm;
718 unsigned long addr = addr0;
719 struct vm_unmapped_area_info info;
720 @@ -1972,9 +1987,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
721 /* requesting a specific address */
723 addr = PAGE_ALIGN(addr);
724 - vma = find_vma(mm, addr);
725 + vma = find_vma_prev(mm, addr, &prev);
726 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
727 - (!vma || addr + len <= vma->vm_start))
728 + (!vma || addr + len <= vm_start_gap(vma)) &&
729 + (!prev || addr >= vm_end_gap(prev)))
733 @@ -2099,21 +2115,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
734 * update accounting. This is shared with both the
735 * grow-up and grow-down cases.
737 -static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
738 +static int acct_stack_growth(struct vm_area_struct *vma,
739 + unsigned long size, unsigned long grow)
741 struct mm_struct *mm = vma->vm_mm;
742 struct rlimit *rlim = current->signal->rlim;
743 - unsigned long new_start, actual_size;
744 + unsigned long new_start;
746 /* address space limit tests */
747 if (!may_expand_vm(mm, grow))
750 /* Stack limit test */
751 - actual_size = size;
752 - if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
753 - actual_size -= PAGE_SIZE;
754 - if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
755 + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
758 /* mlock limit tests */
759 @@ -2151,17 +2165,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
760 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
762 struct mm_struct *mm = vma->vm_mm;
763 + struct vm_area_struct *next;
764 + unsigned long gap_addr;
767 if (!(vma->vm_flags & VM_GROWSUP))
770 /* Guard against wrapping around to address 0. */
771 - if (address < PAGE_ALIGN(address+4))
772 - address = PAGE_ALIGN(address+4);
774 + address &= PAGE_MASK;
775 + address += PAGE_SIZE;
779 + /* Enforce stack_guard_gap */
780 + gap_addr = address + stack_guard_gap;
781 + if (gap_addr < address)
783 + next = vma->vm_next;
784 + if (next && next->vm_start < gap_addr) {
785 + if (!(next->vm_flags & VM_GROWSUP))
787 + /* Check that both stack segments have the same anon_vma? */
790 /* We must make sure the anon_vma is allocated. */
791 if (unlikely(anon_vma_prepare(vma)))
793 @@ -2206,7 +2233,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
795 vma_gap_update(vma->vm_next);
797 - mm->highest_vm_end = address;
798 + mm->highest_vm_end = vm_end_gap(vma);
799 spin_unlock(&mm->page_table_lock);
801 perf_event_mmap(vma);
802 @@ -2227,6 +2254,8 @@ int expand_downwards(struct vm_area_struct *vma,
803 unsigned long address)
805 struct mm_struct *mm = vma->vm_mm;
806 + struct vm_area_struct *prev;
807 + unsigned long gap_addr;
810 address &= PAGE_MASK;
811 @@ -2234,6 +2263,17 @@ int expand_downwards(struct vm_area_struct *vma,
815 + /* Enforce stack_guard_gap */
816 + gap_addr = address - stack_guard_gap;
817 + if (gap_addr > address)
819 + prev = vma->vm_prev;
820 + if (prev && prev->vm_end > gap_addr) {
821 + if (!(prev->vm_flags & VM_GROWSDOWN))
823 + /* Check that both stack segments have the same anon_vma? */
826 /* We must make sure the anon_vma is allocated. */
827 if (unlikely(anon_vma_prepare(vma)))
829 @@ -2289,28 +2329,25 @@ int expand_downwards(struct vm_area_struct *vma,
834 - * Note how expand_stack() refuses to expand the stack all the way to
835 - * abut the next virtual mapping, *unless* that mapping itself is also
836 - * a stack mapping. We want to leave room for a guard page, after all
837 - * (the guard page itself is not added here, that is done by the
838 - * actual page faulting logic)
840 - * This matches the behavior of the guard page logic (see mm/memory.c:
841 - * check_stack_guard_page()), which only allows the guard page to be
842 - * removed under these circumstances.
844 +/* enforced gap between the expanding stack and other mappings. */
845 +unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
847 +static int __init cmdline_parse_stack_guard_gap(char *p)
852 + val = simple_strtoul(p, &endptr, 10);
854 + stack_guard_gap = val << PAGE_SHIFT;
858 +__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
860 #ifdef CONFIG_STACK_GROWSUP
861 int expand_stack(struct vm_area_struct *vma, unsigned long address)
863 - struct vm_area_struct *next;
865 - address &= PAGE_MASK;
866 - next = vma->vm_next;
867 - if (next && next->vm_start == address + PAGE_SIZE) {
868 - if (!(next->vm_flags & VM_GROWSUP))
871 return expand_upwards(vma, address);
874 @@ -2332,14 +2369,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
876 int expand_stack(struct vm_area_struct *vma, unsigned long address)
878 - struct vm_area_struct *prev;
880 - address &= PAGE_MASK;
881 - prev = vma->vm_prev;
882 - if (prev && prev->vm_end == address) {
883 - if (!(prev->vm_flags & VM_GROWSDOWN))
886 return expand_downwards(vma, address);
889 @@ -2437,7 +2466,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
893 - mm->highest_vm_end = prev ? prev->vm_end : 0;
894 + mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
895 tail_vma->vm_next = NULL;