Newer
Older
// SPDX-License-Identifier: GPL-2.0-only
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mm_inline.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/perf_event.h>
Srikar Dronamraju
committed
#include <linux/uprobes.h>
#include <linux/rbtree_augmented.h>
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/userfaultfd_k.h>
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
Andrea Arcangeli
committed
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
Jeremy Fitzhardinge
committed
#include <asm/mmu_context.h>
#define CREATE_TRACE_POINTS
#include <trace/events/mmap.h>
#include "internal.h"
#ifndef arch_mmap_check
#define arch_mmap_check(addr, len, flags) (0)
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
#endif
static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
/* description of effects of mapping type and prot in current implementation.
* this is due to the limited x86 page protection hardware. The expected
* behavior is in parens:
*
* map_type prot
* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
* w: (no) no w: (no) no w: (yes) yes w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
*
* On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
* MAP_PRIVATE (with Enhanced PAN supported):
* r: (no) no
* w: (no) no
* x: (yes) yes
pgprot_t protection_map[16] __ro_after_init = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};
#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
{
return prot;
}
#endif
pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
pgprot_val(arch_vm_get_page_prot(vm_flags)));
return arch_filter_pgprot(ret);
}
EXPORT_SYMBOL(vm_get_page_prot);
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
}
/* Update vma->vm_page_prot to reflect vma->vm_flags. */
void vma_set_page_prot(struct vm_area_struct *vma)
{
unsigned long vm_flags = vma->vm_flags;
pgprot_t vm_page_prot;
vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
if (vma_wants_writenotify(vma, vm_page_prot)) {
vm_flags &= ~VM_SHARED;
vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
}
/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}
* Requires inode->i_mapping->i_mmap_rwsem
*/
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
struct file *file, struct address_space *mapping)
{
if (vma->vm_flags & VM_SHARED)
mapping_unmap_writable(mapping);
vma_interval_tree_remove(vma, &mapping->i_mmap);
* Unlink a file-based vm structure from its interval tree, to hide
* vma from rmap and vmtruncate before freeing its page tables.
void unlink_file_vma(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
if (file) {
struct address_space *mapping = file->f_mapping;
i_mmap_lock_write(mapping);
i_mmap_unlock_write(mapping);
}
/*
* Close a vm structure and free it, returning the next.
*/
static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
{
struct vm_area_struct *next = vma->vm_next;
might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
if (vma->vm_file)
vm_area_free(vma);
static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
struct list_head *uf);
SYSCALL_DEFINE1(brk, unsigned long, brk)
unsigned long newbrk, oldbrk, origbrk;
unsigned long min_brk;
bool populate;
LIST_HEAD(uf);
if (mmap_write_lock_killable(mm))
return -EINTR;
#ifdef CONFIG_COMPAT_BRK
/*
* CONFIG_COMPAT_BRK can still be overridden by setting
* randomize_va_space to 2, which will still cause mm->start_brk
* to be arbitrarily shifted
*/
if (current->brk_randomized)
min_brk = mm->start_brk;
else
min_brk = mm->end_data;
#else
min_brk = mm->start_brk;
#endif
if (brk < min_brk)
/*
* Check against rlimit here. If this check is done later after the test
* of oldbrk with newbrk then it can escape the test and let the data
* segment grow beyond its set limit the in case where the limit is
* not page aligned -Ram Gupta
*/
if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
mm->end_data, mm->start_data))
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk) {
mm->brk = brk;
goto success;
}
/*
* Always allow shrinking brk.
* __do_munmap() may downgrade mmap_lock to read.
* mm->brk must to be protected by write mmap_lock so update it
* before downgrading mmap_lock. When __do_munmap() fails,
* mm->brk will be restored from origbrk.
*/
mm->brk = brk;
ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
if (ret < 0) {
mm->brk = origbrk;
goto out;
} else if (ret == 1) {
downgraded = true;
}
goto success;
next = find_vma(mm, oldbrk);
if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
mmap_read_unlock(mm);
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
return brk;
mmap_write_unlock(mm);
static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
unsigned long gap, prev_end;
/*
* Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
* allow two stack_guard_gaps between them here, and when choosing
* an unmapped area; whereas when expanding we only require one.
* That's a little inconsistent, but keeps the code here simpler.
*/
gap = vm_start_gap(vma);
if (vma->vm_prev) {
prev_end = vm_end_gap(vma->vm_prev);
if (gap > prev_end)
gap -= prev_end;
return gap;
}
#ifdef CONFIG_DEBUG_VM_RB
static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
{
unsigned long max = vma_compute_gap(vma), subtree_gap;
if (vma->vm_rb.rb_left) {
subtree_gap = rb_entry(vma->vm_rb.rb_left,
struct vm_area_struct, vm_rb)->rb_subtree_gap;
if (subtree_gap > max)
max = subtree_gap;
}
if (vma->vm_rb.rb_right) {
subtree_gap = rb_entry(vma->vm_rb.rb_right,
struct vm_area_struct, vm_rb)->rb_subtree_gap;
if (subtree_gap > max)
max = subtree_gap;
}
return max;
}
static int browse_rb(struct mm_struct *mm)
struct rb_root *root = &mm->mm_rb;
struct rb_node *nd, *pn = NULL;
unsigned long prev = 0, pend = 0;
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
pr_emerg("vm_start %lx < prev %lx\n",
vma->vm_start, prev);
bug = 1;
}
if (vma->vm_start < pend) {
pr_emerg("vm_start %lx < pend %lx\n",
vma->vm_start, pend);
bug = 1;
}
if (vma->vm_start > vma->vm_end) {
pr_emerg("vm_start %lx > vm_end %lx\n",
vma->vm_start, vma->vm_end);
spin_lock(&mm->page_table_lock);
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
pr_emerg("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
}
spin_unlock(&mm->page_table_lock);
prev = vma->vm_start;
pend = vma->vm_end;
for (nd = pn; nd; nd = rb_prev(nd))
pr_emerg("backwards %d, forwards %d\n", j, i);
static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
{
struct rb_node *nd;
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
VM_BUG_ON_VMA(vma != ignore &&
vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
vma);
static void validate_mm(struct mm_struct *mm)
unsigned long highest_address = 0;
struct vm_area_struct *vma = mm->mmap;
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma) {
anon_vma_lock_read(anon_vma);
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_verify(avc);
anon_vma_unlock_read(anon_vma);
}
highest_address = vm_end_gap(vma);
pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
bug = 1;
}
if (highest_address != mm->highest_vm_end) {
pr_emerg("mm->highest_vm_end %lx, found %lx\n",
mm->highest_vm_end, highest_address);
if (i != -1)
pr_emerg("map_count %d rb %d\n", mm->map_count, i);
#define validate_mm_rb(root, ignore) do { } while (0)
#define validate_mm(mm) do { } while (0)
#endif
RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
struct vm_area_struct, vm_rb,
unsigned long, rb_subtree_gap, vma_compute_gap)
/*
* Update augmented rbtree rb_subtree_gap values after vma->vm_start or
* vma->vm_prev->vm_end values changed, without modifying the vma's position
* in the rbtree.
*/
static void vma_gap_update(struct vm_area_struct *vma)
{
/*
* As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
* a callback function that does exactly what we want.
*/
vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
}
static inline void vma_rb_insert(struct vm_area_struct *vma,
struct rb_root *root)
{
/* All rb_subtree_gap values must be consistent prior to insertion */
validate_mm_rb(root, NULL);
rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
}
static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
{
/*
* Note rb_erase_augmented is a fairly large inline function,
* so make sure we instantiate it only once with our desired
* augmented rbtree callbacks.
*/
rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
}
static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
struct rb_root *root,
struct vm_area_struct *ignore)
{
/*
* All rb_subtree_gap values must be consistent prior to erase,
* with the possible exception of
*
* a. the "next" vma being erased if next->vm_start was reduced in
* __vma_adjust() -> __vma_unlink()
* b. the vma being erased in detach_vmas_to_be_unmapped() ->
* vma_rb_erase()
*/
validate_mm_rb(root, ignore);
__vma_rb_erase(vma, root);
}
static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
struct rb_root *root)
{
vma_rb_erase_ignore(vma, root, vma);
}
/*
* vma has some anon_vma assigned, and is already inserted on that
* anon_vma's interval trees.
*
* Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
* vma must be removed from the anon_vma's interval trees using
* anon_vma_interval_tree_pre_update_vma().
*
* After the update, the vma will be reinserted using
* anon_vma_interval_tree_post_update_vma().
*
* The entire update must be protected by exclusive mmap_lock and by
* the root anon_vma's mutex.
*/
static inline void
anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc;
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
}
static inline void
anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc;
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
}
static int find_vma_links(struct mm_struct *mm, unsigned long addr,
unsigned long end, struct vm_area_struct **pprev,
struct rb_node ***rb_link, struct rb_node **rb_parent)
struct rb_node **__rb_link, *__rb_parent, *rb_prev;
mmap_assert_locked(mm);
__rb_link = &mm->mm_rb.rb_node;
rb_prev = __rb_parent = NULL;
while (*__rb_link) {
struct vm_area_struct *vma_tmp;
__rb_parent = *__rb_link;
vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
if (vma_tmp->vm_end > addr) {
/* Fail if an existing vma overlaps the area */
if (vma_tmp->vm_start < end)
return -ENOMEM;
__rb_link = &__rb_parent->rb_left;
} else {
rb_prev = __rb_parent;
__rb_link = &__rb_parent->rb_right;
}
}
*pprev = NULL;
if (rb_prev)
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
*rb_link = __rb_link;
*rb_parent = __rb_parent;
return 0;
/*
* vma_next() - Get the next VMA.
* @mm: The mm_struct.
* @vma: The current vma.
*
* If @vma is NULL, return the first vma in the mm.
*
* Returns: The next VMA after @vma.
*/
static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
struct vm_area_struct *vma)
{
if (!vma)
return mm->mmap;
return vma->vm_next;
}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
/*
* munmap_vma_range() - munmap VMAs that overlap a range.
* @mm: The mm struct
* @start: The start of the range.
* @len: The length of the range.
* @pprev: pointer to the pointer that will be set to previous vm_area_struct
* @rb_link: the rb_node
* @rb_parent: the parent rb_node
*
* Find all the vm_area_struct that overlap from @start to
* @end and munmap them. Set @pprev to the previous vm_area_struct.
*
* Returns: -ENOMEM on munmap failure or 0 on success.
*/
static inline int
munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
struct vm_area_struct **pprev, struct rb_node ***link,
struct rb_node **parent, struct list_head *uf)
{
while (find_vma_links(mm, start, start + len, pprev, link, parent))
if (do_munmap(mm, start, len, uf))
return -ENOMEM;
return 0;
}
static unsigned long count_vma_pages_range(struct mm_struct *mm,
unsigned long addr, unsigned long end)
{
unsigned long nr_pages = 0;
struct vm_area_struct *vma;
vma = find_vma_intersection(mm, addr, end);
if (!vma)
return 0;
nr_pages = (min(end, vma->vm_end) -
max(addr, vma->vm_start)) >> PAGE_SHIFT;
/* Iterate over the rest of the overlaps */
for (vma = vma->vm_next; vma; vma = vma->vm_next) {
unsigned long overlap_len;
if (vma->vm_start > end)
break;
overlap_len = min(end, vma->vm_end) - vma->vm_start;
nr_pages += overlap_len >> PAGE_SHIFT;
}
return nr_pages;
}
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
/* Update tracking information for the gap following the new vma. */
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
mm->highest_vm_end = vm_end_gap(vma);
/*
* vma->vm_prev wasn't known when we followed the rbtree to find the
* correct insertion point for that vma. As a result, we could not
* update the vma vm_rb parents rb_subtree_gap values on the way down.
* So, we first insert the vma with a zero rb_subtree_gap value
* (to be consistent with what we did on the way down), and then
* immediately update the gap to the correct value. Finally we
* rebalance the rbtree after all augmented values have been set.
*/
vma->rb_subtree_gap = 0;
vma_gap_update(vma);
vma_rb_insert(vma, &mm->mm_rb);
static void __vma_link_file(struct vm_area_struct *vma)
file = vma->vm_file;
if (file) {
struct address_space *mapping = file->f_mapping;
if (vma->vm_flags & VM_SHARED)
mapping_allow_writable(mapping);
vma_interval_tree_insert(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
}
}
static void
__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node **rb_link,
struct rb_node *rb_parent)
{
__vma_link_list(mm, vma, prev);
__vma_link_rb(mm, vma, rb_link, rb_parent);
}
static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node **rb_link,
struct rb_node *rb_parent)
{
struct address_space *mapping = NULL;
i_mmap_lock_write(mapping);
__vma_link(mm, vma, prev, rb_link, rb_parent);
__vma_link_file(vma);
if (mapping)
i_mmap_unlock_write(mapping);
mm->map_count++;
validate_mm(mm);
}
/*
* Helper for vma_adjust() in the split_vma insert case: insert a vma into the
* mm's list and rbtree. It has already been inserted into the interval tree.
static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
struct vm_area_struct *prev;
if (find_vma_links(mm, vma->vm_start, vma->vm_end,
&prev, &rb_link, &rb_parent))
BUG();
__vma_link(mm, vma, prev, rb_link, rb_parent);
mm->map_count++;
}
static __always_inline void __vma_unlink(struct mm_struct *mm,
struct vm_area_struct *vma,
struct vm_area_struct *ignore)
vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
__vma_unlink_list(mm, vma);
/* Kill the cache */
vmacache_invalidate(mm);
}
/*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
* is already present in an i_mmap tree without adjusting the tree.
* The following helper function should be used when such adjustments
* are necessary. The "insert" vma (if any) is to be inserted
* before we drop the necessary locks.
*/
int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
struct vm_area_struct *expand)
struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
struct rb_root_cached *root = NULL;
struct anon_vma *anon_vma = NULL;
bool start_changed = false, end_changed = false;
long adjust_next = 0;
int remove_next = 0;
if (next && !insert) {
struct vm_area_struct *exporter = NULL, *importer = NULL;
if (end >= next->vm_end) {
/*
* vma expands, overlapping all the next, and
* perhaps the one after too (mprotect case 6).
* The only other cases that gets here are
* case 1, case 7 and case 8.
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
if (next == expand) {
/*
* The only case where we don't expand "vma"
* and we expand "next" instead is case 8.
*/
VM_WARN_ON(end != next->vm_end);
/*
* remove_next == 3 means we're
* removing "vma" and that to do so we
* swapped "vma" and "next".
*/
remove_next = 3;
VM_WARN_ON(file != next->vm_file);
swap(vma, next);
} else {
VM_WARN_ON(expand != vma);
/*
* case 1, 6, 7, remove_next == 2 is case 6,
* remove_next == 1 is case 1 or 7.
*/
remove_next = 1 + (end > next->vm_end);
VM_WARN_ON(remove_next == 2 &&
end != next->vm_next->vm_end);
/* trim end to next, for case 6 first pass */
end = next->vm_end;
}
/*
* If next doesn't have anon_vma, import from vma after
* next, if the vma overlaps with it.
*/
if (remove_next == 2 && !next->anon_vma)
exporter = next->vm_next;
} else if (end > next->vm_start) {
/*
* vma expands, overlapping part of the next:
* mprotect case 5 shifting the boundary up.
*/
adjust_next = (end - next->vm_start);
VM_WARN_ON(expand != importer);
} else if (end < vma->vm_end) {
/*
* vma shrinks, and !insert tells it's not
* split_vma inserting another: so it must be
* mprotect case 4 shifting the boundary down.
*/
adjust_next = -(vma->vm_end - end);
VM_WARN_ON(expand != importer);
/*
* Easily overlooked: when mprotect shifts the boundary,
* make sure the expanding vma has anon_vma set if the
* shrinking vma had, to cover any anon pages imported.
*/
if (exporter && exporter->anon_vma && !importer->anon_vma) {
importer->anon_vma = exporter->anon_vma;
error = anon_vma_clone(importer, exporter);
if (error)
}
}
again:
vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
root = &mapping->i_mmap;
uprobe_munmap(vma, vma->vm_start, vma->vm_end);
if (adjust_next)
uprobe_munmap(next, next->vm_start, next->vm_end);
i_mmap_lock_write(mapping);
* Put into interval tree now, so instantiated pages
* are visible to arm/parisc __flush_dcache_page
* throughout; but we cannot insert into address
* space until vma start or end is updated.
*/
__vma_link_file(insert);
}
}
anon_vma = vma->anon_vma;
if (!anon_vma && adjust_next)
anon_vma = next->anon_vma;
if (anon_vma) {
VM_WARN_ON(adjust_next && next->anon_vma &&
anon_vma != next->anon_vma);
Ingo Molnar
committed
anon_vma_lock_write(anon_vma);
anon_vma_interval_tree_pre_update_vma(vma);
if (adjust_next)
anon_vma_interval_tree_pre_update_vma(next);
}
if (file) {
vma_interval_tree_remove(vma, root);
vma_interval_tree_remove(next, root);
if (start != vma->vm_start) {
vma->vm_start = start;
start_changed = true;
}
if (end != vma->vm_end) {
vma->vm_end = end;
end_changed = true;
}
next->vm_start += adjust_next;
next->vm_pgoff += adjust_next >> PAGE_SHIFT;
if (file) {
vma_interval_tree_insert(next, root);
vma_interval_tree_insert(vma, root);
flush_dcache_mmap_unlock(mapping);
}
if (remove_next) {
/*
* vma_merge has merged next into vma, and needs
* us to remove next before dropping the locks.
*/
if (remove_next != 3)
__vma_unlink(mm, next, next);
else
/*
* vma is not before next if they've been
* swapped.
*
* pre-swap() next->vm_start was reduced so
* tell validate_mm_rb to ignore pre-swap()
* "next" (which is stored in post-swap()
* "vma").
*/
__vma_unlink(mm, next, vma);
if (file)
__remove_shared_vm_struct(next, file, mapping);
} else if (insert) {
/*
* split_vma has split insert from vma, and needs
* us to insert it before dropping the locks
* (it may either follow vma or precede it).
*/
__insert_vm_struct(mm, insert);
} else {
if (start_changed)
vma_gap_update(vma);
if (end_changed) {
if (!next)
mm->highest_vm_end = vm_end_gap(vma);
else if (!adjust_next)
vma_gap_update(next);
}
if (anon_vma) {
anon_vma_interval_tree_post_update_vma(vma);
if (adjust_next)
anon_vma_interval_tree_post_update_vma(next);
anon_vma_unlock_write(anon_vma);
}
if (file) {
i_mmap_unlock_write(mapping);
Srikar Dronamraju
committed
if (adjust_next)
Srikar Dronamraju
committed
}
uprobe_munmap(next, next->vm_start, next->vm_end);
if (next->anon_vma)
anon_vma_merge(vma, next);
mpol_put(vma_policy(next));
vm_area_free(next);
/*
* In mprotect's case 6 (see comments on vma_merge),
* we must remove another next too. It would clutter
* up the code too much to do both in one go.
*/
if (remove_next != 3) {
/*
* If "next" was removed and vma->vm_end was
* expanded (up) over it, in turn
* "next->vm_prev->vm_end" changed and the
* "vma->vm_next" gap must be updated.
*/
next = vma->vm_next;
} else {
/*
* For the scope of the comment "next" and
* "vma" considered pre-swap(): if "vma" was
* removed, next->vm_start was expanded (down)
* over it and the "next" gap must be updated.
* Because of the swap() the post-swap() "vma"
* actually points to pre-swap() "next"
* (post-swap() "next" as opposed is now a
* dangling pointer).
*/
next = vma;
}
if (remove_next == 2) {
remove_next = 1;
end = next->vm_end;
}
else if (next)
vma_gap_update(next);
Andrea Arcangeli
committed
else {
/*
* If remove_next == 2 we obviously can't
* reach this path.
*