Newer
Older
* Memory Migration functionality - linux/mm/migrate.c
*
* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
*
* Page migration was first developed in the context of the memory hotplug
* project. The main authors of the migration code are:
*
* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
* Hirokazu Takahashi <taka@valinux.co.jp>
* Dave Hansen <haveblue@us.ibm.com>
#include <linux/export.h>
#include <linux/swapops.h>
#include <linux/nsproxy.h>
#include <linux/rmap.h>
#include <linux/topology.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/writeback.h>
Christoph Lameter
committed
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
#include <linux/compaction.h>
#include <linux/syscalls.h>
Aneesh Kumar K.V
committed
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/balloon_compaction.h>
#include <linux/mmu_notifier.h>
#include <linux/page_owner.h>
Ingo Molnar
committed
#include <linux/sched/mm.h>
#define CREATE_TRACE_POINTS
#include <trace/events/migrate.h>
Christoph Lameter
committed
* migrate_prep() needs to be called before we start compiling a list of pages
* to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
* undesirable, use migrate_prep_local()
*/
int migrate_prep(void)
{
/*
* Clear the LRU lists so pages can be isolated.
* Note that pages may be moved off the LRU after we have
* drained them. Those pages will fail to migrate like other
* pages that may be busy.
*/
lru_add_drain_all();
return 0;
}
/* Do the necessary work of migrate_prep but not if it involves other CPUs */
int migrate_prep_local(void)
{
lru_add_drain();
return 0;
}
int isolate_movable_page(struct page *page, isolate_mode_t mode)
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
{
struct address_space *mapping;
/*
* Avoid burning cycles with pages that are yet under __free_pages(),
* or just got freed under us.
*
* In case we 'win' a race for a movable page being freed under us and
* raise its refcount preventing __free_pages() from doing its job
* the put_page() at the end of this block will take care of
* release this page, thus avoiding a nasty leakage.
*/
if (unlikely(!get_page_unless_zero(page)))
goto out;
/*
* Check PageMovable before holding a PG_lock because page's owner
* assumes anybody doesn't touch PG_lock of newly allocated page
* so unconditionally grapping the lock ruins page's owner side.
*/
if (unlikely(!__PageMovable(page)))
goto out_putpage;
/*
* As movable pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
* as well as race against the releasing a page.
*
* In order to avoid having an already isolated movable page
* being (wrongly) re-isolated while it is under migration,
* or to avoid attempting to isolate pages being released,
* lets be sure we have the page lock
* before proceeding with the movable page isolation steps.
*/
if (unlikely(!trylock_page(page)))
goto out_putpage;
if (!PageMovable(page) || PageIsolated(page))
goto out_no_isolated;
mapping = page_mapping(page);
VM_BUG_ON_PAGE(!mapping, page);
if (!mapping->a_ops->isolate_page(page, mode))
goto out_no_isolated;
/* Driver shouldn't use PG_isolated bit of page->flags */
WARN_ON_ONCE(PageIsolated(page));
__SetPageIsolated(page);
unlock_page(page);
out_no_isolated:
unlock_page(page);
out_putpage:
put_page(page);
out:
}
/* It should be called on page which is PG_movable */
void putback_movable_page(struct page *page)
{
struct address_space *mapping;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
mapping = page_mapping(page);
mapping->a_ops->putback_page(page);
__ClearPageIsolated(page);
}
/*
* Put previously isolated pages back onto the appropriate lists
* from where they were once taken off for compaction/migration.
*
* This function shall be used whenever the isolated pageset has been
* built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
* and isolate_huge_page().
*/
void putback_movable_pages(struct list_head *l)
{
struct page *page;
struct page *page2;
list_for_each_entry_safe(page, page2, l, lru) {
if (unlikely(PageHuge(page))) {
putback_active_hugepage(page);
continue;
}
list_del(&page->lru);
/*
* We isolated non-lru movable page so here we can use
* __PageMovable because LRU page's mapping cannot have
* PAGE_MAPPING_MOVABLE.
*/
if (unlikely(__PageMovable(page))) {
VM_BUG_ON_PAGE(!PageIsolated(page), page);
lock_page(page);
if (PageMovable(page))
putback_movable_page(page);
else
__ClearPageIsolated(page);
unlock_page(page);
put_page(page);
} else {
putback_lru_page(page);
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
/*
* Restore a potential migration pte to a working pte entry
*/
static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
struct page_vma_mapped_walk pvmw = {
.page = old,
.vma = vma,
.address = addr,
.flags = PVMW_SYNC | PVMW_MIGRATION,
};
struct page *new;
pte_t pte;
swp_entry_t entry;
VM_BUG_ON_PAGE(PageTail(page), page);
while (page_vma_mapped_walk(&pvmw)) {
if (PageKsm(page))
new = page;
else
new = page - pvmw.page->index +
linear_page_index(vma, pvmw.address);
get_page(new);
pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
if (pte_swp_soft_dirty(*pvmw.pte))
pte = pte_mksoft_dirty(pte);
/*
* Recheck VMA as permissions can change since migration started
*/
entry = pte_to_swp_entry(*pvmw.pte);
if (is_write_migration_entry(entry))
pte = maybe_mkwrite(pte, vma);
if (PageHuge(new)) {
pte = pte_mkhuge(pte);
pte = arch_make_huge_pte(pte, vma, new, 0);
}
flush_dcache_page(new);
set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
if (PageHuge(new)) {
if (PageAnon(new))
hugepage_add_anon_rmap(new, vma, pvmw.address);
else
page_dup_rmap(new, true);
} else if (PageAnon(new))
page_add_anon_rmap(new, vma, pvmw.address, false);
page_add_file_rmap(new, false);
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte);
}
/*
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
void remove_migration_ptes(struct page *old, struct page *new, bool locked)
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
.arg = old,
};
if (locked)
rmap_walk_locked(new, &rwc);
else
rmap_walk(new, &rwc);
}
/*
* Something used the pte of a page under migration. We need to
* get to the page and wait until migration is finished.
* When we return from this function the fault will be retried.
*/
void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
swp_entry_t entry;
struct page *page;
pte = *ptep;
if (!is_swap_pte(pte))
goto out;
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry))
goto out;
page = migration_entry_to_page(entry);
/*
* Once radix-tree replacement of page migration started, page_count
* *must* be zero. And, we don't want to call wait_on_page_locked()
* against a page without get_page().
* So, we use get_page_unless_zero(), here. Even failed, page fault
* will occur again.
*/
if (!get_page_unless_zero(page))
goto out;
pte_unmap_unlock(ptep, ptl);
wait_on_page_locked(page);
put_page(page);
return;
out:
pte_unmap_unlock(ptep, ptl);
}
void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address)
{
spinlock_t *ptl = pte_lockptr(mm, pmd);
pte_t *ptep = pte_offset_map(pmd, address);
__migration_entry_wait(mm, ptep, ptl);
}
void migration_entry_wait_huge(struct vm_area_struct *vma,
struct mm_struct *mm, pte_t *pte)
spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
__migration_entry_wait(mm, pte, ptl);
}
Mel Gorman
committed
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
enum migrate_mode mode)
Mel Gorman
committed
{
struct buffer_head *bh = head;
/* Simple case, sync compaction */
if (mode != MIGRATE_ASYNC) {
Mel Gorman
committed
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
do {
get_bh(bh);
lock_buffer(bh);
bh = bh->b_this_page;
} while (bh != head);
return true;
}
/* async case, we cannot block on lock_buffer so use trylock_buffer */
do {
get_bh(bh);
if (!trylock_buffer(bh)) {
/*
* We failed to lock the buffer and cannot stall in
* async migration. Release the taken locks
*/
struct buffer_head *failed_bh = bh;
put_bh(failed_bh);
bh = head;
while (bh != failed_bh) {
unlock_buffer(bh);
put_bh(bh);
bh = bh->b_this_page;
}
return false;
}
bh = bh->b_this_page;
} while (bh != head);
return true;
}
#else
static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
enum migrate_mode mode)
Mel Gorman
committed
{
return true;
}
#endif /* CONFIG_BLOCK */
Christoph Lameter
committed
* Replace the page in the mapping.
*
* The number of remaining references must be:
* 1 for anonymous pages without a mapping
* 2 for pages with a mapping
* 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
int migrate_page_move_mapping(struct address_space *mapping,
Mel Gorman
committed
struct page *newpage, struct page *page,
struct buffer_head *head, enum migrate_mode mode,
int extra_count)
struct zone *oldzone, *newzone;
int dirty;
int expected_count = 1 + extra_count;
/* Anonymous page without mapping */
if (page_count(page) != expected_count)
/* No turning back from here */
newpage->index = page->index;
newpage->mapping = page->mapping;
if (PageSwapBacked(page))
__SetPageSwapBacked(newpage);
return MIGRATEPAGE_SUCCESS;
oldzone = page_zone(page);
newzone = page_zone(newpage);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
expected_count += 1 + page_has_private(page);
Mel Gorman
committed
radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
if (!page_ref_freeze(page, expected_count)) {
Mel Gorman
committed
/*
* In the async migration case of moving a page with buffers, lock the
* buffers using trylock before the mapping is moved. If the mapping
* was moved, we later failed to lock the buffers and could not move
* the mapping back due to an elevated page count, we would have to
* block waiting on other references to be dropped.
*/
if (mode == MIGRATE_ASYNC && head &&
!buffer_migrate_lock_buffers(head, mode)) {
page_ref_unfreeze(page, expected_count);
Mel Gorman
committed
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
* Now we know that no one else is looking at the page:
* no turning back from here.
newpage->index = page->index;
newpage->mapping = page->mapping;
get_page(newpage); /* add cache reference */
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
/* Move dirty while page refs frozen and newpage not yet exposed */
dirty = PageDirty(page);
if (dirty) {
ClearPageDirty(page);
SetPageDirty(newpage);
}
radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
* Drop cache reference from old page by unfreezing
* to one less reference.
* We know this isn't the last reference.
*/
page_ref_unfreeze(page, expected_count - 1);
spin_unlock(&mapping->tree_lock);
/* Leave irq disabled to prevent preemption while updating stats */
/*
* If moved to a different zone then also account
* the page for that zone. Other VM counters will be
* taken care of when we establish references to the
* new page and drop references to the old page.
*
* Note that anonymous pages are accounted for
* via NR_FILE_PAGES and NR_ANON_MAPPED if they
* are mapped to swap space.
*/
if (newzone != oldzone) {
__dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
__inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
if (PageSwapBacked(page) && !PageSwapCache(page)) {
__dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
__inc_node_state(newzone->zone_pgdat, NR_SHMEM);
}
if (dirty && mapping_cap_account_dirty(mapping)) {
__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
__dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
__inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
local_irq_enable();
return MIGRATEPAGE_SUCCESS;
EXPORT_SYMBOL(migrate_page_move_mapping);
/*
* The expected number of remaining references is the same as that
* of migrate_page_move_mapping().
*/
int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page)
{
int expected_count;
void **pslot;
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
expected_count = 2 + page_has_private(page);
if (page_count(page) != expected_count ||
Mel Gorman
committed
radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
if (!page_ref_freeze(page, expected_count)) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
newpage->index = page->index;
newpage->mapping = page->mapping;
radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
page_ref_unfreeze(page, expected_count - 1);
spin_unlock_irq(&mapping->tree_lock);
return MIGRATEPAGE_SUCCESS;
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/*
* Gigantic pages are so large that we do not guarantee that page++ pointer
* arithmetic will work across the entire page. We need something more
* specialized.
*/
static void __copy_gigantic_page(struct page *dst, struct page *src,
int nr_pages)
{
int i;
struct page *dst_base = dst;
struct page *src_base = src;
for (i = 0; i < nr_pages; ) {
cond_resched();
copy_highpage(dst, src);
i++;
dst = mem_map_next(dst, dst_base, i);
src = mem_map_next(src, src_base, i);
}
}
static void copy_huge_page(struct page *dst, struct page *src)
{
int i;
int nr_pages;
if (PageHuge(src)) {
/* hugetlbfs page */
struct hstate *h = page_hstate(src);
nr_pages = pages_per_huge_page(h);
if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
__copy_gigantic_page(dst, src, nr_pages);
return;
}
} else {
/* thp page */
BUG_ON(!PageTransHuge(src));
nr_pages = hpage_nr_pages(src);
}
for (i = 0; i < nr_pages; i++) {
cond_resched();
copy_highpage(dst + i, src + i);
}
}
/*
* Copy the page to its new location
*/
void migrate_page_copy(struct page *newpage, struct page *page)
if (PageHuge(page) || PageTransHuge(page))
copy_huge_page(newpage, page);
else
copy_highpage(newpage, page);
if (PageError(page))
SetPageError(newpage);
if (PageReferenced(page))
SetPageReferenced(newpage);
if (PageUptodate(page))
SetPageUptodate(newpage);
VM_BUG_ON_PAGE(PageUnevictable(page), page);
} else if (TestClearPageUnevictable(page))
SetPageUnevictable(newpage);
if (PageChecked(page))
SetPageChecked(newpage);
if (PageMappedToDisk(page))
SetPageMappedToDisk(newpage);
/* Move dirty on pages not done by migrate_page_move_mapping() */
if (PageDirty(page))
SetPageDirty(newpage);
if (page_is_young(page))
set_page_young(newpage);
if (page_is_idle(page))
set_page_idle(newpage);
/*
* Copy NUMA information to the new page, to prevent over-eager
* future migrations of this same page.
*/
cpupid = page_cpupid_xchg_last(page, -1);
page_cpupid_xchg_last(newpage, cpupid);
/*
* Please do not reorder this without considering how mm/ksm.c's
* get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
*/
if (PageSwapCache(page))
ClearPageSwapCache(page);
ClearPagePrivate(page);
set_page_private(page, 0);
/*
* If any waiters have accumulated on the new page then
* wake them up.
*/
if (PageWriteback(newpage))
end_page_writeback(newpage);
copy_page_owner(page, newpage);
mem_cgroup_migrate(page, newpage);
EXPORT_SYMBOL(migrate_page_copy);
/************************************************************
* Migration functions
***********************************************************/
* Common logic to directly migrate a single LRU page suitable for
* pages that do not use PagePrivate/PagePrivate2.
*
* Pages are locked upon entry and exit.
*/
int migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode)
{
int rc;
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
migrate_page_copy(newpage, page);
return MIGRATEPAGE_SUCCESS;
#ifdef CONFIG_BLOCK
/*
* Migration function for pages with buffers. This function can only be used
* if the underlying filesystem guarantees that no other references to "page"
* exist.
*/
int buffer_migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page, enum migrate_mode mode)
{
struct buffer_head *bh, *head;
int rc;
if (!page_has_buffers(page))
return migrate_page(mapping, newpage, page, mode);
head = page_buffers(page);
rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
Mel Gorman
committed
/*
* In the async case, migrate_page_move_mapping locked the buffers
* with an IRQ-safe spinlock held. In the sync case, the buffers
* need to be locked now
*/
if (mode != MIGRATE_ASYNC)
BUG_ON(!buffer_migrate_lock_buffers(head, mode));
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
ClearPagePrivate(page);
set_page_private(newpage, page_private(page));
set_page_private(page, 0);
put_page(page);
get_page(newpage);
bh = head;
do {
set_bh_page(bh, newpage, bh_offset(bh));
bh = bh->b_this_page;
} while (bh != head);
SetPagePrivate(newpage);
migrate_page_copy(newpage, page);
bh = head;
do {
unlock_buffer(bh);
put_bh(bh);
bh = bh->b_this_page;
} while (bh != head);
return MIGRATEPAGE_SUCCESS;
}
EXPORT_SYMBOL(buffer_migrate_page);
#endif
/*
* Writeback a page to clean the dirty state
*/
static int writeout(struct address_space *mapping, struct page *page)
Christoph Lameter
committed
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = 1,
.range_start = 0,
.range_end = LLONG_MAX,
.for_reclaim = 1
};
int rc;
if (!mapping->a_ops->writepage)
/* No write method for the address space */
return -EINVAL;
if (!clear_page_dirty_for_io(page))
/* Someone else already triggered a write */
return -EAGAIN;
Christoph Lameter
committed
/*
* A dirty page may imply that the underlying filesystem has
* the page on some queue. So the page must be clean for
* migration. Writeout may mean we loose the lock and the
* page state is no longer what we checked for earlier.
* At this point we know that the migration attempt cannot
* be successful.
Christoph Lameter
committed
*/
remove_migration_ptes(page, page, false);
Christoph Lameter
committed
rc = mapping->a_ops->writepage(page, &wbc);
Christoph Lameter
committed
if (rc != AOP_WRITEPAGE_ACTIVATE)
/* unlocked. Relock */
lock_page(page);
}
/*
* Default handling if a filesystem does not provide a migration function.
*/
static int fallback_migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page, enum migrate_mode mode)
Mel Gorman
committed
if (PageDirty(page)) {
/* Only writeback pages in full synchronous migration */
if (mode != MIGRATE_SYNC)
Mel Gorman
committed
return -EBUSY;
return writeout(mapping, page);
Mel Gorman
committed
}
Christoph Lameter
committed
/*
* Buffers may be managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
if (page_has_private(page) &&
Christoph Lameter
committed
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
return migrate_page(mapping, newpage, page, mode);
Christoph Lameter
committed
}
/*
* Move a page to a newly allocated page
* The page is locked and all ptes have been successfully removed.
*
* The new page will have replaced the old page if this function
* is successful.
*
* Return value:
* < 0 - error code
* MIGRATEPAGE_SUCCESS - success
static int move_to_new_page(struct page *newpage, struct page *page,
enum migrate_mode mode)
{
struct address_space *mapping;
int rc = -EAGAIN;
bool is_lru = !__PageMovable(page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
mapping = page_mapping(page);
if (likely(is_lru)) {
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
else if (mapping->a_ops->migratepage)
/*
* Most pages have a mapping and most filesystems
* provide a migratepage callback. Anonymous pages
* are part of swap space which also has its own
* migratepage callback. This is the most common path
* for page migration.
*/
rc = mapping->a_ops->migratepage(mapping, newpage,
page, mode);
else
rc = fallback_migrate_page(mapping, newpage,
page, mode);
} else {
* In case of non-lru page, it could be released after
* isolation step. In that case, we shouldn't try migration.
VM_BUG_ON_PAGE(!PageIsolated(page), page);
if (!PageMovable(page)) {
rc = MIGRATEPAGE_SUCCESS;
__ClearPageIsolated(page);
goto out;
}
rc = mapping->a_ops->migratepage(mapping, newpage,
page, mode);
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
!PageIsolated(page));
}
/*
* When successful, old pagecache page->mapping must be cleared before
* page is freed; but stats require that PageAnon be left as PageAnon.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
if (__PageMovable(page)) {
VM_BUG_ON_PAGE(!PageIsolated(page), page);
/*
* We clear PG_movable under page_lock so any compactor
* cannot try to migrate this page.
*/
__ClearPageIsolated(page);
}
/*
* Anonymous and movable page->mapping will be cleard by
* free_pages_prepare so don't reset it here for keeping
* the type to work PageAnon, for example.
*/
if (!PageMappingFlags(page))
page->mapping = NULL;
return rc;
}
static int __unmap_and_move(struct page *page, struct page *newpage,
int force, enum migrate_mode mode)
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(page);
if (!force || mode == MIGRATE_ASYNC)
/*
* It's not safe for direct compaction to call lock_page.
* For example, during page readahead pages are added locked
* to the LRU. Later, when the IO completes the pages are
* marked uptodate and unlocked. However, the queueing
* could be merging multiple pages for one bio (e.g.
* mpage_readpages). If an allocation happens for the
* second or third page, the process can end up locking
* the same page twice and deadlocking. Rather than
* trying to be clever about what pages can be locked,
* avoid the use of lock_page for direct compaction
* altogether.
*/
if (current->flags & PF_MEMALLOC)
lock_page(page);
}
if (PageWriteback(page)) {
Andrea Arcangeli
committed
/*
* Only in the case of a full synchronous migration is it
* necessary to wait for PageWriteback. In the async case,
* the retry loop is too short and in the sync-light case,
* the overhead of stalling is too much
Andrea Arcangeli
committed
*/
if (mode != MIGRATE_SYNC) {
Andrea Arcangeli
committed
rc = -EBUSY;
Andrea Arcangeli
committed
}
if (!force)
wait_on_page_writeback(page);
}
* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
* This get_anon_vma() delays freeing anon_vma pointer until the end
* of migration. File cache pages are no problem because of page_lock()
* File Caches may use write_page() or lock_page() in migration, then,
* just care Anon page here.
*
* Only page_get_anon_vma() understands the subtleties of
* getting a hold on an anon_vma from outside one of its mms.
* But if we cannot get anon_vma, then we won't need it anyway,
* because that implies that the anon page is no longer mapped
* (and cannot be remapped so long as we hold the page lock).
if (PageAnon(page) && !PageKsm(page))
anon_vma = page_get_anon_vma(page);
/*
* Block others from accessing the new page when we get around to
* establishing additional references. We are usually the only one
* holding a reference to newpage at this point. We used to have a BUG
* here if trylock_page(newpage) fails, but would like to allow for
* cases where there might be a race with the previous use of newpage.
* This is much like races on refcount of oldpage: just don't BUG().
*/
if (unlikely(!trylock_page(newpage)))
goto out_unlock;
if (unlikely(!is_lru)) {
rc = move_to_new_page(newpage, page, mode);
goto out_unlock_both;
}
* Corner case handling:
* 1. When a new swap-cache page is read into, it is added to the LRU
* and treated as swapcache but it has no rmap yet.
* Calling try_to_unmap() against a page->mapping==NULL page will
* trigger a BUG. So handle it here.
* 2. An orphaned page (see truncate_complete_page) might have
* fs-private metadata. The page can be picked up due to memory
* offlining. Everywhere else except page reclaim, the page is
* invisible to the vm, so the page can not be migrated. So try to
* free the metadata, so the page can be freed.
VM_BUG_ON_PAGE(PageAnon(page), page);