Newer
Older
/*
* linux/mm/page_alloc.c
*
* Manages the free list, the system allocates free pages here.
* Note that kmalloc() lives in slab.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
* Swap reorganised 29.12.95, Stephen Tweedie
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
* Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
* Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
* Zone balancing, Kanoj Sarcar, SGI, Jan 2000
* Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
* (lots of bits borrowed from Ingo Molnar & Andrew Morton)
*/
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
#include <linux/mempolicy.h>
Yasunori Goto
committed
#include <linux/stop_machine.h>
#include <linux/sort.h>
#include <linux/pfn.h>
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
* Array of node states.
nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
[N_POSSIBLE] = NODE_MASK_ALL,
[N_ONLINE] = { { [0] = 1UL } },
#ifndef CONFIG_NUMA
[N_NORMAL_MEMORY] = { { [0] = 1UL } },
#ifdef CONFIG_HIGHMEM
[N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
[N_CPU] = { { [0] = 1UL } },
#endif /* NUMA */
};
EXPORT_SYMBOL(node_states);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
unsigned long highest_memmap_pfn __read_mostly;
int percpu_pagelist_fraction;
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
int pageblock_order __read_mostly;
#endif
static void __free_pages_ok(struct page *page, unsigned int order);
/*
* results with 256, 32 in the lowmem_reserve sysctl:
* 1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
* 1G machine -> (16M dma, 784M normal, 224M high)
* NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
* HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
* HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
*
* TBD: should special case ZONE_DMA32 machines here - in those we normally
* don't need any ZONE_NORMAL reservation
Christoph Lameter
committed
int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
#ifdef CONFIG_ZONE_DMA
Christoph Lameter
committed
256,
#ifdef CONFIG_ZONE_DMA32
Christoph Lameter
committed
256,
#ifdef CONFIG_HIGHMEM
Christoph Lameter
committed
};
static char * const zone_names[MAX_NR_ZONES] = {
#ifdef CONFIG_ZONE_DMA
Christoph Lameter
committed
"DMA",
#ifdef CONFIG_ZONE_DMA32
Christoph Lameter
committed
"DMA32",
Christoph Lameter
committed
"Normal",
#ifdef CONFIG_HIGHMEM
Christoph Lameter
committed
};
Yasunori Goto
committed
unsigned long __meminitdata nr_kernel_pages;
unsigned long __meminitdata nr_all_pages;
static unsigned long __meminitdata dma_reserve;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/*
* MAX_ACTIVE_REGIONS determines the maximum number of distinct
* ranges of memory (RAM) that may be registered with add_active_range().
* Ranges passed to add_active_range() will be merged if possible
* so the number of times add_active_range() can be called is
* related to the number of nodes and the number of holes
*/
#ifdef CONFIG_MAX_ACTIVE_REGIONS
/* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
#define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
#else
#if MAX_NUMNODES >= 32
/* If there can be many nodes, allow up to 50 holes per node */
#define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
#else
/* By default, allow up to 256 distinct regions */
#define MAX_ACTIVE_REGIONS 256
#endif
#endif
static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
static int __meminitdata nr_nodemap_entries;
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
static unsigned long __initdata required_kernelcore;
static unsigned long __initdata required_movablecore;
static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
int movable_zone;
EXPORT_SYMBOL(movable_zone);
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
#if MAX_NUMNODES > 1
int nr_node_ids __read_mostly = MAX_NUMNODES;
Christoph Lameter
committed
int nr_online_nodes __read_mostly = 1;
Christoph Lameter
committed
EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly;
static void set_pageblock_migratetype(struct page *page, int migratetype)
{
if (unlikely(page_group_by_mobility_disabled))
migratetype = MIGRATE_UNMOVABLE;
set_pageblock_flags_group(page, (unsigned long)migratetype,
PB_migrate, PB_migrate_end);
}
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
int ret = 0;
unsigned seq;
unsigned long pfn = page_to_pfn(page);
do {
seq = zone_span_seqbegin(zone);
if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
ret = 1;
else if (pfn < zone->zone_start_pfn)
ret = 1;
} while (zone_span_seqretry(zone, seq));
return ret;
}
static int page_is_consistent(struct zone *zone, struct page *page)
{
if (!pfn_valid_within(page_to_pfn(page)))
return 0;
return 1;
}
/*
* Temporary debugging check for pages not lying within a given zone.
*/
static int bad_range(struct zone *zone, struct page *page)
{
if (page_outside_zone_boundaries(zone, page))
if (!page_is_consistent(zone, page))
return 1;
#else
static inline int bad_range(struct zone *zone, struct page *page)
{
return 0;
}
#endif
static unsigned long resume;
static unsigned long nr_shown;
static unsigned long nr_unshown;
/*
* Allow a burst of 60 reports, then keep quiet for that minute;
* or allow a steady drip of one report per second.
*/
if (nr_shown == 60) {
if (time_before(jiffies, resume)) {
nr_unshown++;
goto out;
}
if (nr_unshown) {
printk(KERN_ALERT
"BUG: Bad page state: %lu messages suppressed\n",
nr_unshown);
nr_unshown = 0;
}
nr_shown = 0;
}
if (nr_shown++ == 0)
resume = jiffies + 60 * HZ;
printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
current->comm, page_to_pfn(page));
"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
page, (void *)page->flags, page_count(page),
page_mapcount(page), page->mapping, page->index);
/* Leave bad fields for debug, except PageBuddy could make trouble */
__ClearPageBuddy(page);
add_taint(TAINT_BAD_PAGE);
}
/*
* Higher-order pages are called "compound pages". They are structured thusly:
*
* The first PAGE_SIZE page is called the "head page".
*
* The remaining PAGE_SIZE pages are called "tail pages".
*
* All pages have PG_compound set. All pages have their ->private pointing at
* the head page (even the head page has this).
*
* The first tail page's ->lru.next holds the address of the compound page's
* put_page() function. Its ->lru.prev holds the order of allocation.
* This usage means that zero-order pages may not be compound.
static void free_compound_page(struct page *page)
{
__free_pages_ok(page, compound_order(page));
void prep_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
set_compound_page_dtor(page, free_compound_page);
set_compound_order(page, order);
__SetPageHead(page);
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
__SetPageTail(p);
p->first_page = page;
}
}
static int destroy_compound_page(struct page *page, unsigned long order)
if (unlikely(compound_order(page) != order) ||
unlikely(!PageHead(page))) {
__ClearPageHead(page);
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
if (unlikely(!PageTail(p) || (p->first_page != page))) {
static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
{
int i;
/*
* clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
* and __GFP_HIGHMEM from hard or soft interrupt context.
*/
VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);
}
static inline void set_page_order(struct page *page, int order)
{
__SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
__ClearPageBuddy(page);
}
/*
* Locate the struct page for both the matching buddy in our
* pair (buddy1) and the combined O(n+1) page they form (page).
*
* 1) Any buddy B1 will have an order O twin B2 which satisfies
* the following equation:
* B2 = B1 ^ (1 << O)
* For example, if the starting buddy (buddy2) is #8 its order
* 1 buddy is #10:
* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
*
* 2) Any buddy B will have an order O+1 parent P which
* satisfies the following equation:
* P = B & ~(1 << O)
*
* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
*/
static inline struct page *
__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
{
unsigned long buddy_idx = page_idx ^ (1 << order);
return page + (buddy_idx - page_idx);
}
static inline unsigned long
__find_combined_index(unsigned long page_idx, unsigned int order)
{
return (page_idx & ~(1 << order));
}
/*
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (b) the buddy is in the buddy system &&
* (c) a page and its buddy have the same order &&
* (d) a page and its buddy are in the same zone.
*
* For recording whether a page is in the buddy system, we use PG_buddy.
* Setting, clearing, and testing PG_buddy is serialized by zone->lock.
* For recording page's order, we use page_private(page).
static inline int page_is_buddy(struct page *page, struct page *buddy,
int order)
if (!pfn_valid_within(page_to_pfn(buddy)))
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
if (PageBuddy(buddy) && page_order(buddy) == order) {
Nick Piggin
committed
VM_BUG_ON(page_count(buddy) != 0);
}
}
/*
* Freeing function for a buddy system allocator.
*
* The concept of a buddy system is to maintain direct-mapped table
* (containing bit values) for memory blocks of various "orders".
* The bottom level table contains the map for the smallest allocatable
* units of memory (here, pages), and each level above it describes
* pairs of units from the levels below, hence, "buddies".
* At a high level, all that happens here is marking the table entry
* at the bottom level available, and propagating the changes upward
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
* free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
* free, the remainder of the region must be split into blocks.
* If a block is freed, and its buddy is also free, then this
* triggers coalescing into a block of larger size.
*
* -- wli
*/
static inline void __free_one_page(struct page *page,
struct zone *zone, unsigned int order,
int migratetype)
if (unlikely(destroy_compound_page(page, order)))
return;
VM_BUG_ON(migratetype == -1);
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
VM_BUG_ON(page_idx & ((1 << order) - 1));
while (order < MAX_ORDER-1) {
unsigned long combined_idx;
struct page *buddy;
buddy = __page_find_buddy(page, page_idx, order);
if (!page_is_buddy(page, buddy, order))
/* Our buddy is free, merge with it and move up one order. */
zone->free_area[order].nr_free--;
combined_idx = __find_combined_index(page_idx, order);
page = page + (combined_idx - page_idx);
page_idx = combined_idx;
order++;
}
set_page_order(page, order);
list_add(&page->lru,
&zone->free_area[order].free_list[migratetype]);
#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
/*
* free_page_mlock() -- clean up attempts to free and mlocked() page.
* Page should not be on lru, so no need to fix that up.
* free_pages_check() will verify...
*/
static inline void free_page_mlock(struct page *page)
{
__ClearPageMlocked(page);
__dec_zone_page_state(page, NR_MLOCK);
__count_vm_event(UNEVICTABLE_MLOCKFREED);
}
#else
static void free_page_mlock(struct page *page) { }
#endif
static inline int free_pages_check(struct page *page)
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
Nick Piggin
committed
(atomic_read(&page->_count) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
return 0;
}
/*
* Frees a list of pages.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
* If the zone was previously in an "all pages pinned" state then look to
* see if this freeing clears that state.
*
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
static void free_pages_bulk(struct zone *zone, int count,
struct list_head *list, int order)
zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
__mod_zone_page_state(zone, NR_FREE_PAGES, count << order);
/* have to delete it as __free_one_page list manipulates */
__free_one_page(page, zone, order, page_private(page));
static void free_one_page(struct zone *zone, struct page *page, int order,
int migratetype)
zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
__free_one_page(page, zone, order, migratetype);
}
static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int clearMlocked = PageMlocked(page);
bad += free_pages_check(page + i);
if (bad)
debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
debug_check_no_obj_freed(page_address(page),
PAGE_SIZE << order);
}
if (unlikely(clearMlocked))
free_page_mlock(page);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order,
get_pageblock_migratetype(page));
/*
* permit the bootmem allocator to evade page validation on high-order frees
*/
void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
if (order == 0) {
__ClearPageReserved(page);
set_page_count(page, 0);
} else {
int loop;
for (loop = 0; loop < BITS_PER_LONG; loop++) {
struct page *p = &page[loop];
if (loop + 1 < BITS_PER_LONG)
prefetchw(p + 1);
__ClearPageReserved(p);
set_page_count(p, 0);
}
}
}
/*
* The order of subdivision here is critical for the IO subsystem.
* Please do not alter this order without good reasons and regression
* testing. Specifically, as large blocks of memory are subdivided,
* the order in which smaller blocks are delivered depends on the order
* they're subdivided in this function. This is the primary factor
* influencing the order in which pages are delivered to the IO
* subsystem according to empirical testing, and this is also justified
* by considering the behavior of a buddy system containing a single
* large block of memory acted on by a series of small allocations.
* This behavior is a critical factor in sglist merging's success.
*
* -- wli
*/
static inline void expand(struct zone *zone, struct page *page,
int low, int high, struct free_area *area,
int migratetype)
{
unsigned long size = 1 << high;
while (high > low) {
area--;
high--;
size >>= 1;
list_add(&page[size].lru, &area->free_list[migratetype]);
area->nr_free++;
set_page_order(&page[size], high);
}
}
/*
* This page is about to be returned from the page allocator
*/
static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
Nick Piggin
committed
(atomic_read(&page->_count) != 0) |
(page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
if (gfp_flags & __GFP_ZERO)
prep_zero_page(page, order, gfp_flags);
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
Mel Gorman
committed
/*
* Go through the free lists for the given migratetype and remove
* the smallest available page from the freelists
*/
static inline
struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
Mel Gorman
committed
int migratetype)
{
unsigned int current_order;
struct free_area * area;
struct page *page;
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
area = &(zone->free_area[current_order]);
if (list_empty(&area->free_list[migratetype]))
continue;
page = list_entry(area->free_list[migratetype].next,
struct page, lru);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
expand(zone, page, order, current_order, area, migratetype);
return page;
}
return NULL;
}
/*
* This array describes the order lists are fallen back to when
* the free lists for the desirable migrate type are depleted
*/
static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
[MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */
/*
* Move the free pages in a range to the free lists of the requested type.
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
static int move_freepages(struct zone *zone,
struct page *start_page, struct page *end_page,
int migratetype)
{
struct page *page;
unsigned long order;
int pages_moved = 0;
#ifndef CONFIG_HOLES_IN_ZONE
/*
* page_zone is not safe to call in this context when
* CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
* anyway as we check zone boundaries in move_freepages_block().
* Remove at a later date when no bug reports exist related to
*/
BUG_ON(page_zone(start_page) != page_zone(end_page));
#endif
for (page = start_page; page <= end_page;) {
/* Make sure we are not inadvertently changing nodes */
VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
continue;
}
if (!PageBuddy(page)) {
page++;
continue;
}
order = page_order(page);
list_del(&page->lru);
list_add(&page->lru,
&zone->free_area[order].free_list[migratetype]);
page += 1 << order;
pages_moved += 1 << order;
return pages_moved;
static int move_freepages_block(struct zone *zone, struct page *page,
int migratetype)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
start_pfn = page_to_pfn(page);
start_pfn = start_pfn & ~(pageblock_nr_pages-1);
start_page = pfn_to_page(start_pfn);
end_page = start_page + pageblock_nr_pages - 1;
end_pfn = start_pfn + pageblock_nr_pages - 1;
/* Do not cross zone boundaries */
if (start_pfn < zone->zone_start_pfn)
start_page = page;
if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
return 0;
return move_freepages(zone, start_page, end_page, migratetype);
}
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
{
struct free_area * area;
int current_order;
struct page *page;
int migratetype, i;
/* Find the largest possible block of pages in the other list */
for (current_order = MAX_ORDER-1; current_order >= order;
--current_order) {
for (i = 0; i < MIGRATE_TYPES - 1; i++) {
migratetype = fallbacks[start_migratetype][i];
Mel Gorman
committed
/* MIGRATE_RESERVE handled later if necessary */
if (migratetype == MIGRATE_RESERVE)
continue;
area = &(zone->free_area[current_order]);
if (list_empty(&area->free_list[migratetype]))
continue;
page = list_entry(area->free_list[migratetype].next,
struct page, lru);
area->nr_free--;
/*
* If breaking a large block of pages, move all free
* pages to the preferred allocation list. If falling
* back for a reclaimable kernel allocation, be more
* agressive about taking ownership of free pages
if (unlikely(current_order >= (pageblock_order >> 1)) ||
start_migratetype == MIGRATE_RECLAIMABLE) {
unsigned long pages;
pages = move_freepages_block(zone, page,
start_migratetype);
/* Claim the whole block if over half of it is free */
if (pages >= (1 << (pageblock_order-1)))
set_pageblock_migratetype(page,
start_migratetype);
migratetype = start_migratetype;
/* Remove the page from the freelists */
list_del(&page->lru);
rmv_page_order(page);
if (current_order == pageblock_order)
set_pageblock_migratetype(page,
start_migratetype);
expand(zone, page, order, current_order, area, migratetype);
return page;
}
}
Mel Gorman
committed
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
int migratetype)
Mel Gorman
committed
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
Mel Gorman
committed
page = __rmqueue_fallback(zone, order, migratetype);
/*
* Use MIGRATE_RESERVE rather than fail an allocation. goto
* is used because __rmqueue_smallest is an inline function
* and we want just one call site
*/
if (!page) {
migratetype = MIGRATE_RESERVE;
goto retry_reserve;
}
}
}
/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
* Returns the number of new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype)
struct page *page = __rmqueue(zone, order, migratetype);
/*
* Split buddy pages returned by expand() are received here
* in physical page order. The page is added to the callers and
* list and the list head then moves forward. From the callers
* perspective, the linked list is ordered by page number in
* some conditions. This is useful for IO devices that can
* merge IO requests if the physical pages are ordered
* properly.
*/
list_add(&page->lru, list);
set_page_private(page, migratetype);
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
Christoph Lameter
committed
/*
* Called from the vmstat counter updater to drain pagesets of this
* currently executing processor on remote nodes after they have
* expired.
*
* Note that this function must be called with the thread pinned to
* a single processor.
Christoph Lameter
committed
*/
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
local_irq_save(flags);
if (pcp->count >= pcp->batch)
to_drain = pcp->batch;
else
to_drain = pcp->count;
free_pages_bulk(zone, to_drain, &pcp->list, 0);
pcp->count -= to_drain;
local_irq_restore(flags);
/*
* Drain pages of the indicated processor.
*
* The processor must either be the current processor and the
* thread pinned to the current processor or a processor that
* is not online.
*/
static void drain_pages(unsigned int cpu)
for_each_populated_zone(zone) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp;
local_irq_save(flags);
free_pages_bulk(zone, pcp->count, &pcp->list, 0);
pcp->count = 0;
local_irq_restore(flags);
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
*/
void drain_local_pages(void *arg)
{
drain_pages(smp_processor_id());
}
/*
* Spill all the per-cpu pages from all CPUs back into the buddy allocator
*/
void drain_all_pages(void)
{
on_each_cpu(drain_local_pages, NULL, 1);
#ifdef CONFIG_HIBERNATION
unsigned long pfn, max_zone_pfn;
unsigned long flags;
struct list_head *curr;
if (!zone->spanned_pages)
return;
spin_lock_irqsave(&zone->lock, flags);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!swsusp_page_is_forbidden(page))
swsusp_unset_page_free(page);
for_each_migratetype_order(order, t) {
list_for_each(curr, &zone->free_area[order].free_list[t]) {