Skip to content
Snippets Groups Projects
page_alloc.c 62.9 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
/*
 *  linux/mm/page_alloc.c
 *
 *  Manages the free list, the system allocates free pages here.
 *  Note that kmalloc() lives in slab.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *  Swap reorganised 29.12.95, Stephen Tweedie
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
 *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
 *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
 *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
 *  Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
 *          (lots of bits borrowed from Ingo Molnar & Andrew Morton)
 */

#include <linux/config.h>
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/memory_hotplug.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
Linus Torvalds's avatar
Linus Torvalds committed

#include <asm/tlbflush.h>
#include <asm/div64.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include "internal.h"

/*
 * MCD - HACK: Find somewhere to initialize this EARLY, or make this
 * initializer cleaner
 */
nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
long nr_swap_pages;
int percpu_pagelist_fraction;
Linus Torvalds's avatar
Linus Torvalds committed

static void __free_pages_ok(struct page *page, unsigned int order);
Linus Torvalds's avatar
Linus Torvalds committed
/*
 * results with 256, 32 in the lowmem_reserve sysctl:
 *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
 *	1G machine -> (16M dma, 784M normal, 224M high)
 *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
 *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
 *	HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
 *
 * TBD: should special case ZONE_DMA32 machines here - in those we normally
 * don't need any ZONE_NORMAL reservation
Linus Torvalds's avatar
Linus Torvalds committed
 */
int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
Linus Torvalds's avatar
Linus Torvalds committed

EXPORT_SYMBOL(totalram_pages);

/*
 * Used by page_zone() to look up the address of the struct zone whose
 * id is encoded in the upper bits of page->flags
 */
struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
EXPORT_SYMBOL(zone_table);

static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
Linus Torvalds's avatar
Linus Torvalds committed
int min_free_kbytes = 1024;

unsigned long __meminitdata nr_kernel_pages;
unsigned long __meminitdata nr_all_pages;
Linus Torvalds's avatar
Linus Torvalds committed

#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
{
	int ret = 0;
	unsigned seq;
	unsigned long pfn = page_to_pfn(page);
	do {
		seq = zone_span_seqbegin(zone);
		if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
			ret = 1;
		else if (pfn < zone->zone_start_pfn)
			ret = 1;
	} while (zone_span_seqretry(zone, seq));

	return ret;
}

static int page_is_consistent(struct zone *zone, struct page *page)
{
Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_HOLES_IN_ZONE
	if (!pfn_valid(page_to_pfn(page)))
Linus Torvalds's avatar
Linus Torvalds committed
#endif
	if (zone != page_zone(page))
		return 0;

	return 1;
}
/*
 * Temporary debugging check for pages not lying within a given zone.
 */
static int bad_range(struct zone *zone, struct page *page)
{
	if (page_outside_zone_boundaries(zone, page))
Linus Torvalds's avatar
Linus Torvalds committed
		return 1;
	if (!page_is_consistent(zone, page))
		return 1;

Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
}

#else
static inline int bad_range(struct zone *zone, struct page *page)
{
	return 0;
}
#endif

static void bad_page(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
{
	printk(KERN_EMERG "Bad page state in process '%s'\n"
		KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
		KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
		KERN_EMERG "Backtrace:\n",
		current->comm, page, (int)(2*sizeof(unsigned long)),
		(unsigned long)page->flags, page->mapping,
		page_mapcount(page), page_count(page));
Linus Torvalds's avatar
Linus Torvalds committed
	dump_stack();
	page->flags &= ~(1 << PG_lru	|
			1 << PG_private |
Linus Torvalds's avatar
Linus Torvalds committed
			1 << PG_locked	|
			1 << PG_active	|
			1 << PG_dirty	|
			1 << PG_reclaim |
			1 << PG_slab    |
Linus Torvalds's avatar
Linus Torvalds committed
			1 << PG_swapcache |
Linus Torvalds's avatar
Linus Torvalds committed
	set_page_count(page, 0);
	reset_page_mapcount(page);
	page->mapping = NULL;
	add_taint(TAINT_BAD_PAGE);
Linus Torvalds's avatar
Linus Torvalds committed
}

/*
 * Higher-order pages are called "compound pages".  They are structured thusly:
 *
 * The first PAGE_SIZE page is called the "head page".
 *
 * The remaining PAGE_SIZE pages are called "tail pages".
 *
 * All pages have PG_compound set.  All pages have their ->private pointing at
 * the head page (even the head page has this).
 *
 * The first tail page's ->lru.next holds the address of the compound page's
 * put_page() function.  Its ->lru.prev holds the order of allocation.
 * This usage means that zero-order pages may not be compound.
Linus Torvalds's avatar
Linus Torvalds committed
 */

static void free_compound_page(struct page *page)
{
	__free_pages_ok(page, (unsigned long)page[1].lru.prev);
}

Linus Torvalds's avatar
Linus Torvalds committed
static void prep_compound_page(struct page *page, unsigned long order)
{
	int i;
	int nr_pages = 1 << order;

	page[1].lru.next = (void *)free_compound_page;	/* set dtor */
	page[1].lru.prev = (void *)order;
Linus Torvalds's avatar
Linus Torvalds committed
	for (i = 0; i < nr_pages; i++) {
		struct page *p = page + i;

		__SetPageCompound(p);
		set_page_private(p, (unsigned long)page);
Linus Torvalds's avatar
Linus Torvalds committed
	}
}

static void destroy_compound_page(struct page *page, unsigned long order)
Loading
Loading full blame...