Newer
Older
* Drain pcplists of the indicated processor and zone.
*
* The processor must either be the current processor and the
* thread pinned to the current processor or a processor that
* is not online.
*/
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
local_irq_save(flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
if (pcp->count) {
free_pcppages_bulk(zone, pcp->count, pcp);
pcp->count = 0;
}
local_irq_restore(flags);
}
/*
* Drain pcplists of all zones on the indicated processor.
*
* The processor must either be the current processor and the
* thread pinned to the current processor or a processor that
* is not online.
*/
static void drain_pages(unsigned int cpu)
{
struct zone *zone;
for_each_populated_zone(zone) {
drain_pages_zone(cpu, zone);
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
*
* The CPU has to be pinned. When zone parameter is non-NULL, spill just
* the single zone's pages.
void drain_local_pages(struct zone *zone)
int cpu = smp_processor_id();
if (zone)
drain_pages_zone(cpu, zone);
else
drain_pages(cpu);
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
*
* When zone parameter is non-NULL, spill just the single zone's pages.
*
* Note that this code is protected against sending an IPI to an offline
* CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
* on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
* nothing keeps CPUs from showing up after we populated the cpumask and
* before the call to on_each_cpu_mask().
void drain_all_pages(struct zone *zone)
int cpu;
/*
* Allocate in the BSS so we wont require allocation in
* direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
*/
static cpumask_t cpus_with_pcps;
/*
* We don't care about racing with CPU hotplug event
* as offline notification will cause the notified
* cpu to drain that CPU pcps and on_each_cpu_mask
* disables preemption as part of its processing
*/
for_each_online_cpu(cpu) {
struct per_cpu_pageset *pcp;
struct zone *z;
bool has_pcps = false;
pcp = per_cpu_ptr(zone->pageset, cpu);
has_pcps = true;
} else {
for_each_populated_zone(z) {
pcp = per_cpu_ptr(z->pageset, cpu);
if (pcp->pcp.count) {
has_pcps = true;
break;
}
if (has_pcps)
cpumask_set_cpu(cpu, &cpus_with_pcps);
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
zone, 1);
#ifdef CONFIG_HIBERNATION
unsigned long pfn, max_zone_pfn;
unsigned long flags;
unsigned int order, t;
struct page *page;
if (zone_is_empty(zone))
return;
spin_lock_irqsave(&zone->lock, flags);
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (!swsusp_page_is_forbidden(page))
swsusp_unset_page_free(page);
for_each_migratetype_order(order, t) {
list_for_each_entry(page,
&zone->free_area[order].free_list[t], lru) {
pfn = page_to_pfn(page);
for (i = 0; i < (1UL << order); i++)
swsusp_set_page_free(pfn_to_page(pfn + i));
#endif /* CONFIG_PM */
* cold == true ? free a cold page : free a hot page
void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Free ISOLATE pages back to the allocator because they are being
* offlined but treat RESERVE as movable pages so we can get those
* areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(zone, page, pfn, 0, migratetype);
goto out;
}
migratetype = MIGRATE_MOVABLE;
}
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
else
list_add_tail(&page->lru, &pcp->lists[migratetype]);
unsigned long batch = READ_ONCE(pcp->batch);
free_pcppages_bulk(zone, batch, pcp);
pcp->count -= batch;
out:
/*
* Free a list of 0-order pages
*/
void free_hot_cold_page_list(struct list_head *list, bool cold)
{
struct page *page, *next;
list_for_each_entry_safe(page, next, list, lru) {
trace_mm_page_free_batched(page, cold);
free_hot_cold_page(page, cold);
}
}
/*
* split_page takes a non-compound higher-order page, and splits it into
* n (1<<order) sub-pages: page[0..n]
* Each sub-page must be freed individually.
*
* Note: this is probably too low level an operation for use in drivers.
* Please consult with lkml before using this in your driver.
*/
void split_page(struct page *page, unsigned int order)
{
int i;
VM_BUG_ON_PAGE(PageCompound(page), page);
VM_BUG_ON_PAGE(!page_count(page), page);
#ifdef CONFIG_KMEMCHECK
/*
* Split shadow pages too, because free(page[0]) would
* otherwise free the whole shadow.
*/
if (kmemcheck_page_is_tracked(page))
split_page(virt_to_page(page[0].shadow), order);
#endif
gfp_mask = get_page_owner_gfp(page);
set_page_owner(page, 0, gfp_mask);
for (i = 1; i < (1 << order); i++) {
set_page_refcounted(page + i);
set_page_owner(page + i, 0, gfp_mask);
int __isolate_free_page(struct page *page, unsigned int order)
{
unsigned long watermark;
struct zone *zone;
BUG_ON(!PageBuddy(page));
zone = page_zone(page);
Marek Szyprowski
committed
mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
Marek Szyprowski
committed
/* Obey watermarks as if the page was being allocated */
watermark = low_wmark_pages(zone) + (1 << order);
if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
Marek Szyprowski
committed
}
/* Remove page from free list */
list_del(&page->lru);
zone->free_area[order].nr_free--;
rmv_page_order(page);
set_page_owner(page, order, __GFP_MOVABLE);
/* Set the pageblock if the isolated page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
int mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
set_pageblock_migratetype(page,
MIGRATE_MOVABLE);
}
return 1UL << order;
Mel Gorman
committed
}
/*
* Similar to split_page except the page is already free. As this is only
* being used for migration, the migratetype of the block also changes.
* As this is called with interrupts disabled, the caller is responsible
* for calling arch_alloc_page() and kernel_map_page() after interrupts
* are enabled.
*
* Note: this is probably too low level an operation for use in drivers.
* Please consult with lkml before using this in your driver.
*/
int split_free_page(struct page *page)
{
unsigned int order;
int nr_pages;
order = page_order(page);
nr_pages = __isolate_free_page(page, order);
Mel Gorman
committed
if (!nr_pages)
return 0;
/* Split into individual pages */
set_page_refcounted(page);
split_page(page, order);
return nr_pages;
* Allocate a page from the given zone. Use pcplists for order-0 allocations.
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
gfp_t gfp_flags, int alloc_flags, int migratetype)
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct list_head *list;
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold);
if (unlikely(list_empty(list)))
if (cold)
page = list_last_entry(list, struct page, lru);
page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
if (unlikely(gfp_flags & __GFP_NOFAIL)) {
/*
* __GFP_NOFAIL is not to be used in new code.
*
* All __GFP_NOFAIL callers should be fixed so that they
* properly detect and handle allocation failures.
*
* We most definitely don't want callers attempting to
* allocate greater than order-1 page units with
* __GFP_NOFAIL.
*/
WARN_ON_ONCE(order > 1);
page = NULL;
if (alloc_flags & ALLOC_HARDER) {
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
if (!page)
page = __rmqueue(zone, order, migratetype);
spin_unlock(&zone->lock);
if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
get_pcppage_migratetype(page));
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
!test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
failed:
local_irq_restore(flags);
return NULL;
#ifdef CONFIG_FAIL_PAGE_ALLOC
struct fault_attr attr;
bool ignore_gfp_highmem;
bool ignore_gfp_reclaim;
u32 min_order;
} fail_page_alloc = {
.attr = FAULT_ATTR_INITIALIZER,
.ignore_gfp_reclaim = true,
.ignore_gfp_highmem = true,
.min_order = 1,
};
static int __init setup_fail_page_alloc(char *str)
{
return setup_fault_attr(&fail_page_alloc.attr, str);
}
__setup("fail_page_alloc=", setup_fail_page_alloc);
static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
if (order < fail_page_alloc.min_order)
if (gfp_mask & __GFP_NOFAIL)
if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
if (fail_page_alloc.ignore_gfp_reclaim &&
(gfp_mask & __GFP_DIRECT_RECLAIM))
return should_fail(&fail_page_alloc.attr, 1 << order);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init fail_page_alloc_debugfs(void)
{
struct dentry *dir;
dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
&fail_page_alloc.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
&fail_page_alloc.ignore_gfp_reclaim))
goto fail;
if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
&fail_page_alloc.ignore_gfp_highmem))
goto fail;
if (!debugfs_create_u32("min-order", mode, dir,
&fail_page_alloc.min_order))
goto fail;
return 0;
fail:
debugfs_remove_recursive(dir);
}
late_initcall(fail_page_alloc_debugfs);
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
#else /* CONFIG_FAIL_PAGE_ALLOC */
static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
}
#endif /* CONFIG_FAIL_PAGE_ALLOC */
* Return true if free base pages are above 'mark'. For high-order checks it
* will return true of the order-0 watermark is reached and there is at least
* one free page of a suitable size. Checking now avoids taking the zone lock
* to check in the allocation paths if no pages are free.
static bool __zone_watermark_ok(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx, int alloc_flags,
long free_pages)
const int alloc_harder = (alloc_flags & ALLOC_HARDER);
/* free_pages may go negative - that's OK */
free_pages -= (1 << order) - 1;
/*
* If the caller does not have rights to ALLOC_HARDER then subtract
* the high-atomic reserves. This will over-estimate the size of the
* atomic reserve but it avoids a search.
*/
if (likely(!alloc_harder))
free_pages -= z->nr_reserved_highatomic;
else
#ifdef CONFIG_CMA
/* If allocation can't use CMA areas don't use free CMA pages */
if (!(alloc_flags & ALLOC_CMA))
free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
/*
* Check watermarks for an order-0 allocation request. If these
* are not met, then a high-order request also cannot go ahead
* even if a suitable page happened to be free.
*/
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return false;
/* If this is an order-0 request then the watermark is fine */
if (!order)
return true;
/* For a high-order request, check at least one suitable page is free */
for (o = order; o < MAX_ORDER; o++) {
struct free_area *area = &z->free_area[o];
int mt;
if (!area->nr_free)
continue;
if (alloc_harder)
return true;
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
if (!list_empty(&area->free_list[mt]))
return true;
}
#ifdef CONFIG_CMA
if ((alloc_flags & ALLOC_CMA) &&
!list_empty(&area->free_list[MIGRATE_CMA])) {
return true;
}
#endif
return false;
}
bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
zone_page_state(z, NR_FREE_PAGES));
}
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
unsigned long mark, int classzone_idx)
{
long free_pages = zone_page_state(z, NR_FREE_PAGES);
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
return __zone_watermark_ok(z, order, mark, classzone_idx, 0,
free_pages);
static bool zone_local(struct zone *local_zone, struct zone *zone)
{
return local_zone->node == zone->node;
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
RECLAIM_DISTANCE;
#else /* CONFIG_NUMA */
static bool zone_local(struct zone *local_zone, struct zone *zone)
{
return true;
}
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return true;
}
#endif /* CONFIG_NUMA */
static void reset_alloc_batches(struct zone *preferred_zone)
{
struct zone *zone = preferred_zone->zone_pgdat->node_zones;
do {
mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
} while (zone++ != preferred_zone);
}
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
*/
static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
const struct alloc_context *ac)
struct zonelist *zonelist = ac->zonelist;
struct zoneref *z;
struct zone *zone;
int nr_fair_skipped = 0;
bool zonelist_rescan;
zonelist_rescan = false;
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed() comment in kernel/cpuset.c.
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
ac->nodemask) {
unsigned long mark;
if (cpusets_enabled() &&
(alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed(zone, gfp_mask))
continue;
/*
* Distribute pages in proportion to the individual
* zone size to ensure fair page aging. The zone a
* page was allocated in should have no effect on the
* time the page has in memory before being reclaimed.
*/
if (alloc_flags & ALLOC_FAIR) {
if (!zone_local(ac->preferred_zone, zone))
Mel Gorman
committed
break;
if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
nr_fair_skipped++;
/*
* When allocating a page cache page for writing, we
* want to get it from a zone that is within its dirty
* limit, such that no single zone holds more than its
* proportional share of globally allowed dirty pages.
* The dirty limits take into account the zone's
* lowmem reserves and high watermark so that kswapd
* should be able to balance it without having to
* write pages from its LRU list.
*
* This may look like it could increase pressure on
* lower zones by failing allocations in higher zones
* before they are full. But the pages that do spill
* over are limited as the lower zones are protected
* by this very same mechanism. It should not become
* a practical burden to them.
*
* XXX: For now, allow allocations to potentially
* exceed the per-zone dirty limit in the slowpath
* (spread_dirty_pages unset) before going into reclaim,
* which is important when on a NUMA setup the allowed
* zones are together not big enough to reach the
* global limit. The proper fix for these situations
* will require awareness of zones in the
* dirty-throttling and the flusher threads.
*/
if (ac->spread_dirty_pages && !zone_dirty_ok(zone))
Mel Gorman
committed
continue;
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (!zone_watermark_ok(zone, order, mark,
ac->classzone_idx, alloc_flags)) {
int ret;
/* Checked here to keep the fast path fast */
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;
if (zone_reclaim_mode == 0 ||
!zone_allows_reclaim(ac->preferred_zone, zone))
continue;
ret = zone_reclaim(zone, gfp_mask, order);
switch (ret) {
case ZONE_RECLAIM_NOSCAN:
/* did not scan */
continue;
case ZONE_RECLAIM_FULL:
/* scanned but unreclaimable */
continue;
default:
/* did we reclaim enough */
if (zone_watermark_ok(zone, order, mark,
ac->classzone_idx, alloc_flags))
goto try_this_zone;
continue;
try_this_zone:
page = buffered_rmqueue(ac->preferred_zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
if (prep_new_page(page, order, gfp_mask, alloc_flags))
goto try_this_zone;
/*
* If this is a high-order atomic allocation then check
* if the pageblock should be reserved for the future
*/
if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
reserve_highatomic_pageblock(page, zone, order);
/*
* The first pass makes sure allocations are spread fairly within the
* local node. However, the local node might have free pages left
* after the fairness batches are exhausted, and remote zones haven't
* even been considered yet. Try once more without fairness, and
* include remote zones now, before entering the slowpath and waking
* kswapd: prefer spilling to a remote zone over swapping locally.
*/
if (alloc_flags & ALLOC_FAIR) {
alloc_flags &= ~ALLOC_FAIR;
if (nr_fair_skipped) {
zonelist_rescan = true;
reset_alloc_batches(ac->preferred_zone);
}
if (nr_online_nodes > 1)
zonelist_rescan = true;
}
if (zonelist_rescan)
goto zonelist_scan;
return NULL;
David Rientjes
committed
/*
* Large machines with many possible nodes should not always dump per-node
* meminfo in irq context.
*/
static inline bool should_suppress_show_mem(void)
{
bool ret = false;
#if NODES_SHIFT > 8
ret = in_interrupt();
#endif
return ret;
}
static DEFINE_RATELIMIT_STATE(nopage_rs,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
debug_guardpage_minorder() > 0)
return;
/*
* This documents exceptions given to allocations in certain
* contexts that are allowed to allocate outside current's set
* of allowed nodes.
*/
if (!(gfp_mask & __GFP_NOMEMALLOC))
if (test_thread_flag(TIF_MEMDIE) ||
(current->flags & (PF_MEMALLOC | PF_EXITING)))
filter &= ~SHOW_MEM_FILTER_NODES;
Mel Gorman
committed
if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
filter &= ~SHOW_MEM_FILTER_NODES;
if (fmt) {
struct va_format vaf;
va_list args;
vaf.fmt = fmt;
vaf.va = &args;
pr_warn("%pV", &vaf);
pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
current->comm, order, gfp_mask, &gfp_mask);
dump_stack();
if (!should_suppress_show_mem())
show_mem(filter);
}
static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
{
struct oom_control oc = {
.zonelist = ac->zonelist,
.nodemask = ac->nodemask,
.gfp_mask = gfp_mask,
.order = order,
};
struct page *page;
*did_some_progress = 0;
/*
* Acquire the oom lock. If that fails, somebody else is
* making progress for us.
*/
if (!mutex_trylock(&oom_lock)) {
*did_some_progress = 1;
schedule_timeout_uninterruptible(1);
/*
* Go through the zonelist yet one more time, keep very high watermark
* here, this is only to catch a parallel oom killing, we must fail if
* we're still under heavy pressure.
*/
page = get_page_from_freelist(gfp_mask | __GFP_HARDWALL, order,
ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac);
goto out;
if (!(gfp_mask & __GFP_NOFAIL)) {
/* Coredumps can quickly deplete all memory reserves */
if (current->flags & PF_DUMPCORE)
goto out;
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->high_zoneidx < ZONE_NORMAL)
/* The OOM killer does not compensate for IO-less reclaim */
if (!(gfp_mask & __GFP_FS)) {
/*
* XXX: Page reclaim didn't yield anything,
* and the OOM killer can't be invoked, but
* keep looping as per tradition.
*/
*did_some_progress = 1;
goto out;
if (pm_suspended_storage())
goto out;
/* The OOM killer may not free memory on a specific node */
if (gfp_mask & __GFP_THISNODE)
goto out;
}
/* Exhausted what can be done so it's blamo time */
if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
*did_some_progress = 1;
if (gfp_mask & __GFP_NOFAIL) {
page = get_page_from_freelist(gfp_mask, order,
ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac);
/*
* fallback to ignore cpuset restriction if our nodes
* are depleted
*/
if (!page)
page = get_page_from_freelist(gfp_mask, order,
ALLOC_NO_WATERMARKS, ac);
}
}
out:
return page;
}
#ifdef CONFIG_COMPACTION
/* Try memory compaction for high-order allocations before reclaim */
static struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
bool *deferred_compaction)
unsigned long compact_result;
Vlastimil Babka
committed
struct page *page;
if (!order)
Mel Gorman
committed
return NULL;
current->flags |= PF_MEMALLOC;
compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
mode, contended_compaction);
current->flags &= ~PF_MEMALLOC;
Vlastimil Babka
committed
switch (compact_result) {
case COMPACT_DEFERRED:
*deferred_compaction = true;
Vlastimil Babka
committed
/* fall-through */
case COMPACT_SKIPPED:
return NULL;
default:
break;
}
Vlastimil Babka
committed
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
* count a compaction stall
*/
count_vm_event(COMPACTSTALL);
page = get_page_from_freelist(gfp_mask, order,
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
Vlastimil Babka
committed
if (page) {
struct zone *zone = page_zone(page);
Vlastimil Babka
committed
zone->compact_blockskip_flush = false;
compaction_defer_reset(zone, order, true);
count_vm_event(COMPACTSUCCESS);
return page;
}
Vlastimil Babka
committed
/*
* It's bad if compaction run occurs and fails. The most likely reason
* is that pages exist, but not enough to satisfy watermarks.
*/
count_vm_event(COMPACTFAIL);
Mel Gorman
committed
Vlastimil Babka
committed
cond_resched();
return NULL;
}
#else
static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
int alloc_flags, const struct alloc_context *ac,
enum migrate_mode mode, int *contended_compaction,
bool *deferred_compaction)
{
return NULL;
}
#endif /* CONFIG_COMPACTION */
/* Perform direct synchronous page reclaim */
static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac)
{
struct reclaim_state reclaim_state;
int progress;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
current->flags |= PF_MEMALLOC;
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
progress = try_to_free_pages(ac->zonelist, order, gfp_mask,
ac->nodemask);
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
current->flags &= ~PF_MEMALLOC;
cond_resched();
return progress;
}
/* The really slow allocator path where we enter direct reclaim */
static inline struct page *
__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
int alloc_flags, const struct alloc_context *ac,
unsigned long *did_some_progress)
{
struct page *page = NULL;
bool drained = false;
*did_some_progress = __perform_reclaim(gfp_mask, order, ac);
if (unlikely(!(*did_some_progress)))
return NULL;