Newer
Older
* of contiguous memory as there's no hardware page translation to
* assemble apparent contiguous memory from discontiguous pages.
*
* Queueing large contiguous runs of pages for batching, however,
* causes the pages to actually be freed in smaller chunks. As there
* can be a significant delay between the individual batches being
* recycled, this leads to the once large chunks of space being
* fragmented and becoming unavailable for high-order allocations.
*/
return 0;
#endif
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
{
struct per_cpu_pages *pcp;
memset(p, 0, sizeof(*p));
pcp->count = 0;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
INIT_LIST_HEAD(&pcp->list);
}
/*
* setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
* to the value high for the pageset p.
*/
static void setup_pagelist_highmark(struct per_cpu_pageset *p,
unsigned long high)
{
struct per_cpu_pages *pcp;
pcp->high = high;
pcp->batch = max(1UL, high/4);
if ((high/4) > (PAGE_SHIFT * 8))
pcp->batch = PAGE_SHIFT * 8;
}
* Boot pageset table. One per cpu which is going to be used for all
* zones and all nodes. The parameters will be set in such a way
* that an item put on a list will immediately be handed over to
* the buddy list. This is safe since pageset manipulation is done
* with interrupts disabled.
*
* Some NUMA counter updates may also be caught by the boot pagesets.
*
* The boot_pagesets must be kept even after bootup is complete for
* unused processors and/or zones. They do play a role for bootstrapping
* hotplugged processors.
*
* zoneinfo_show() and maybe other functions do
* not check if the processor is online before following the pageset pointer.
* Other parts of the kernel may not check if the zone is available.
static struct per_cpu_pageset boot_pageset[NR_CPUS];
/*
* Dynamically allocate memory for the
* per cpu pageset array in struct zone.
*/
static int __cpuinit process_zones(int cpu)
int node = cpu_to_node(cpu);
node_set_state(node, N_CPU); /* this node has a cpu */
for_each_populated_zone(zone) {
zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
if (percpu_pagelist_fraction)
setup_pagelist_highmark(zone_pcp(zone, cpu),
(zone->present_pages / percpu_pagelist_fraction));
}
return 0;
bad:
for_each_zone(dzone) {
if (!populated_zone(dzone))
continue;
zone_pcp(dzone, cpu) = &boot_pageset[cpu];
}
return -ENOMEM;
}
static inline void free_zone_pagesets(int cpu)
{
struct zone *zone;
for_each_zone(zone) {
struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
/* Free per_cpu_pageset if it is slab allocated */
if (pset != &boot_pageset[cpu])
kfree(pset);
zone_pcp(zone, cpu) = &boot_pageset[cpu];
static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int cpu = (long)hcpu;
int ret = NOTIFY_OK;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
if (process_zones(cpu))
ret = NOTIFY_BAD;
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
free_zone_pagesets(cpu);
break;
default:
break;
Chandra Seetharaman
committed
static struct notifier_block __cpuinitdata pageset_notifier =
{ &pageset_cpuup_callback, NULL, 0 };
void __init setup_per_cpu_pageset(void)
{
int err;
/* Initialize per_cpu_pageset for cpu 0.
* A cpuup callback will do this for every cpu
* as it comes online
*/
err = process_zones(smp_processor_id());
BUG_ON(err);
register_cpu_notifier(&pageset_notifier);
}
#endif
Yasunori Goto
committed
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
{
int i;
struct pglist_data *pgdat = zone->zone_pgdat;
Yasunori Goto
committed
size_t alloc_size;
/*
* The per-page waitqueue mechanism uses hashed waitqueues
* per zone.
*/
Yasunori Goto
committed
zone->wait_table_hash_nr_entries =
wait_table_hash_nr_entries(zone_size_pages);
zone->wait_table_bits =
wait_table_bits(zone->wait_table_hash_nr_entries);
Yasunori Goto
committed
alloc_size = zone->wait_table_hash_nr_entries
* sizeof(wait_queue_head_t);
if (!slab_is_available()) {
Yasunori Goto
committed
zone->wait_table = (wait_queue_head_t *)
alloc_bootmem_node(pgdat, alloc_size);
} else {
/*
* This case means that a zone whose size was 0 gets new memory
* via memory hot-add.
* But it may be the case that a new node was hot-added. In
* this case vmalloc() will not be able to use this new node's
* memory - this wait_table must be initialized to use this new
* node itself as well.
* To use this new node's memory, further consideration will be
* necessary.
*/
zone->wait_table = vmalloc(alloc_size);
Yasunori Goto
committed
}
if (!zone->wait_table)
return -ENOMEM;
Yasunori Goto
committed
for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)
init_waitqueue_head(zone->wait_table + i);
Yasunori Goto
committed
return 0;
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
static int __zone_pcp_update(void *data)
{
struct zone *zone = data;
int cpu;
unsigned long batch = zone_batchsize(zone), flags;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
pset = zone_pcp(zone, cpu);
pcp = &pset->pcp;
local_irq_save(flags);
free_pages_bulk(zone, pcp->count, &pcp->list, 0);
setup_pageset(pset, batch);
local_irq_restore(flags);
}
return 0;
}
void zone_pcp_update(struct zone *zone)
{
stop_machine(__zone_pcp_update, zone, NULL);
}
static __meminit void zone_pcp_init(struct zone *zone)
{
int cpu;
unsigned long batch = zone_batchsize(zone);
for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_NUMA
/* Early boot. Slab allocator not functional yet */
setup_pageset(&boot_pageset[cpu],0);
#else
setup_pageset(zone_pcp(zone,cpu), batch);
#endif
}
if (zone->present_pages)
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
zone->name, zone->present_pages, batch);
Yasunori Goto
committed
__meminit int init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
unsigned long size,
enum memmap_context context)
{
struct pglist_data *pgdat = zone->zone_pgdat;
Yasunori Goto
committed
int ret;
ret = zone_wait_table_init(zone, size);
if (ret)
return ret;
pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn;
mminit_dprintk(MMINIT_TRACE, "memmap_init",
"Initialising map node %d zone %lu pfns %lu -> %lu\n",
pgdat->node_id,
(unsigned long)zone_idx(zone),
zone_start_pfn, (zone_start_pfn + size));
zone_init_free_lists(zone);
Yasunori Goto
committed
return 0;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/*
* Basic iterator support. Return the first range of PFNs for a node
* Note: nid == MAX_NUMNODES returns first region regardless of node
*/
static int __meminit first_active_region_index_in_nid(int nid)
{
int i;
for (i = 0; i < nr_nodemap_entries; i++)
if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
return i;
return -1;
}
/*
* Basic iterator support. Return the next active range of PFNs for a node
* Note: nid == MAX_NUMNODES returns next region regardless of node
static int __meminit next_active_region_index_in_nid(int index, int nid)
{
for (index = index + 1; index < nr_nodemap_entries; index++)
if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
return index;
return -1;
}
#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
/*
* Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
* Architectures may implement their own version but if add_active_range()
* was used and there are no special requirements, this is a convenient
* alternative
*/
int __meminit __early_pfn_to_nid(unsigned long pfn)
{
int i;
for (i = 0; i < nr_nodemap_entries; i++) {
unsigned long start_pfn = early_node_map[i].start_pfn;
unsigned long end_pfn = early_node_map[i].end_pfn;
if (start_pfn <= pfn && pfn < end_pfn)
return early_node_map[i].nid;
}
/* This is a memory hole */
return -1;
}
#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
int __meminit early_pfn_to_nid(unsigned long pfn)
{
int nid;
nid = __early_pfn_to_nid(pfn);
if (nid >= 0)
return nid;
/* just returns 0 */
return 0;
#ifdef CONFIG_NODES_SPAN_OTHER_NODES
bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
{
int nid;
nid = __early_pfn_to_nid(pfn);
if (nid >= 0 && nid != node)
return false;
return true;
}
#endif
/* Basic iterator support to walk early_node_map[] */
#define for_each_active_range_index_in_nid(i, nid) \
for (i = first_active_region_index_in_nid(nid); i != -1; \
i = next_active_region_index_in_nid(i, nid))
/**
* free_bootmem_with_active_regions - Call free_bootmem_node for each active range
* @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
* @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
*
* If an architecture guarantees that all ranges registered with
* add_active_ranges() contain no holes and may be freed, this
* this function may be used instead of calling free_bootmem() manually.
*/
void __init free_bootmem_with_active_regions(int nid,
unsigned long max_low_pfn)
{
int i;
for_each_active_range_index_in_nid(i, nid) {
unsigned long size_pages = 0;
unsigned long end_pfn = early_node_map[i].end_pfn;
if (early_node_map[i].start_pfn >= max_low_pfn)
continue;
if (end_pfn > max_low_pfn)
end_pfn = max_low_pfn;
size_pages = end_pfn - early_node_map[i].start_pfn;
free_bootmem_node(NODE_DATA(early_node_map[i].nid),
PFN_PHYS(early_node_map[i].start_pfn),
size_pages << PAGE_SHIFT);
}
}
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
for_each_active_range_index_in_nid(i, nid) {
ret = work_fn(early_node_map[i].start_pfn,
early_node_map[i].end_pfn, data);
if (ret)
break;
}
/**
* sparse_memory_present_with_active_regions - Call memory_present for each active range
* @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
*
* If an architecture guarantees that all ranges registered with
* add_active_ranges() contain no holes and may be freed, this
* function may be used instead of calling memory_present() manually.
*/
void __init sparse_memory_present_with_active_regions(int nid)
{
int i;
for_each_active_range_index_in_nid(i, nid)
memory_present(early_node_map[i].nid,
early_node_map[i].start_pfn,
early_node_map[i].end_pfn);
}
/**
* get_pfn_range_for_nid - Return the start and end page frames for a node
* @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
* @start_pfn: Passed by reference. On return, it will have the node start_pfn.
* @end_pfn: Passed by reference. On return, it will have the node end_pfn.
*
* It returns the start and end page frame of a node based on information
* provided by an arch calling add_active_range(). If called for a node
* with no available memory, a warning is printed and the start and end
* PFNs will be 0.
void __meminit get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn)
{
int i;
*start_pfn = -1UL;
*end_pfn = 0;
for_each_active_range_index_in_nid(i, nid) {
*start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
*end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
}
if (*start_pfn == -1UL)
*start_pfn = 0;
}
/*
* This finds a zone that can be used for ZONE_MOVABLE pages. The
* assumption is made that zones within a node are ordered in monotonic
* increasing memory addresses so that the "highest" populated zone is used
*/
static void __init find_usable_zone_for_movable(void)
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
{
int zone_index;
for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
if (zone_index == ZONE_MOVABLE)
continue;
if (arch_zone_highest_possible_pfn[zone_index] >
arch_zone_lowest_possible_pfn[zone_index])
break;
}
VM_BUG_ON(zone_index == -1);
movable_zone = zone_index;
}
/*
* The zone ranges provided by the architecture do not include ZONE_MOVABLE
* because it is sized independant of architecture. Unlike the other zones,
* the starting point for ZONE_MOVABLE is not fixed. It may be different
* in each node depending on the size of each node and how evenly kernelcore
* is distributed. This helper function adjusts the zone ranges
* provided by the architecture for a given node by using the end of the
* highest usable zone for ZONE_MOVABLE. This preserves the assumption that
* zones within a node are in order of monotonic increases memory addresses
*/
static void __meminit adjust_zone_range_for_zone_movable(int nid,
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
unsigned long zone_type,
unsigned long node_start_pfn,
unsigned long node_end_pfn,
unsigned long *zone_start_pfn,
unsigned long *zone_end_pfn)
{
/* Only adjust if ZONE_MOVABLE is on this node */
if (zone_movable_pfn[nid]) {
/* Size ZONE_MOVABLE */
if (zone_type == ZONE_MOVABLE) {
*zone_start_pfn = zone_movable_pfn[nid];
*zone_end_pfn = min(node_end_pfn,
arch_zone_highest_possible_pfn[movable_zone]);
/* Adjust for ZONE_MOVABLE starting within this range */
} else if (*zone_start_pfn < zone_movable_pfn[nid] &&
*zone_end_pfn > zone_movable_pfn[nid]) {
*zone_end_pfn = zone_movable_pfn[nid];
/* Check if this whole range is within ZONE_MOVABLE */
} else if (*zone_start_pfn >= zone_movable_pfn[nid])
*zone_start_pfn = *zone_end_pfn;
}
}
/*
* Return the number of pages a zone spans in a node, including holes
* present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
*/
static unsigned long __meminit zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *ignored)
{
unsigned long node_start_pfn, node_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
/* Get the start and end of the node and zone */
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
adjust_zone_range_for_zone_movable(nid, zone_type,
node_start_pfn, node_end_pfn,
&zone_start_pfn, &zone_end_pfn);
/* Check that this node has pages within the zone's required range */
if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
return 0;
/* Move the zone boundaries inside the node if necessary */
zone_end_pfn = min(zone_end_pfn, node_end_pfn);
zone_start_pfn = max(zone_start_pfn, node_start_pfn);
/* Return the spanned pages */
return zone_end_pfn - zone_start_pfn;
}
/*
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
static unsigned long __meminit __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
int i = 0;
unsigned long prev_end_pfn = 0, hole_pages = 0;
unsigned long start_pfn;
/* Find the end_pfn of the first active range of pfns in the node */
i = first_active_region_index_in_nid(nid);
if (i == -1)
return 0;
prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
/* Account for ranges before physical memory on this node */
if (early_node_map[i].start_pfn > range_start_pfn)
hole_pages = prev_end_pfn - range_start_pfn;
/* Find all holes for the zone within the node */
for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
/* No need to continue if prev_end_pfn is outside the zone */
if (prev_end_pfn >= range_end_pfn)
break;
/* Make sure the end of the zone is not within the hole */
start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
prev_end_pfn = max(prev_end_pfn, range_start_pfn);
/* Update the hole size cound and move on */
if (start_pfn > range_start_pfn) {
BUG_ON(prev_end_pfn > start_pfn);
hole_pages += start_pfn - prev_end_pfn;
}
prev_end_pfn = early_node_map[i].end_pfn;
}
/* Account for ranges past physical memory on this node */
if (range_end_pfn > prev_end_pfn)
hole_pages += range_end_pfn -
max(range_start_pfn, prev_end_pfn);
return hole_pages;
}
/**
* absent_pages_in_range - Return number of page frames in holes within a range
* @start_pfn: The start PFN to start searching for holes
* @end_pfn: The end PFN to stop searching for holes
*
* It returns the number of pages frames in memory holes within a range.
*/
unsigned long __init absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn)
{
return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
}
/* Return the number of page frames in holes in a zone on a node */
static unsigned long __meminit zone_absent_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *ignored)
{
unsigned long node_start_pfn, node_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type],
node_start_pfn);
zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
node_end_pfn);
adjust_zone_range_for_zone_movable(nid, zone_type,
node_start_pfn, node_end_pfn,
&zone_start_pfn, &zone_end_pfn);
return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
#else
static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *zones_size)
{
return zones_size[zone_type];
}
static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *zholes_size)
{
if (!zholes_size)
return 0;
return zholes_size[zone_type];
}
#endif
static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
unsigned long realtotalpages, totalpages = 0;
enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++)
totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
zones_size);
pgdat->node_spanned_pages = totalpages;
realtotalpages = totalpages;
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -=
zone_absent_pages_in_node(pgdat->node_id, i,
zholes_size);
pgdat->node_present_pages = realtotalpages;
printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
realtotalpages);
}
#ifndef CONFIG_SPARSEMEM
/*
* Calculate the size of the zone->blockflags rounded to an unsigned long
* Start by making sure zonesize is a multiple of pageblock_order by rounding
* up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally
* round what is now in bits to nearest long in bits, then return it in
* bytes.
*/
static unsigned long __init usemap_size(unsigned long zonesize)
{
unsigned long usemapsize;
usemapsize = roundup(zonesize, pageblock_nr_pages);
usemapsize = usemapsize >> pageblock_order;
usemapsize *= NR_PAGEBLOCK_BITS;
usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
return usemapsize / 8;
}
static void __init setup_usemap(struct pglist_data *pgdat,
struct zone *zone, unsigned long zonesize)
{
unsigned long usemapsize = usemap_size(zonesize);
zone->pageblock_flags = NULL;
if (usemapsize)
zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
}
#else
static void inline setup_usemap(struct pglist_data *pgdat,
struct zone *zone, unsigned long zonesize) {}
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
/* Return a sensible default order for the pageblock size. */
static inline int pageblock_default_order(void)
{
if (HPAGE_SHIFT > PAGE_SHIFT)
return HUGETLB_PAGE_ORDER;
return MAX_ORDER-1;
}
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
static inline void __init set_pageblock_order(unsigned int order)
{
/* Check that pageblock_nr_pages has not already been setup */
if (pageblock_order)
return;
/*
* Assume the largest contiguous order of interest is a huge page.
* This value may be variable depending on boot parameters on IA64
*/
pageblock_order = order;
}
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
/*
* When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order()
* and pageblock_default_order() are unused as pageblock_order is set
* at compile-time. See include/linux/pageblock-flags.h for the values of
* pageblock_order based on the kernel config
*/
static inline int pageblock_default_order(unsigned int order)
{
return MAX_ORDER-1;
}
#define set_pageblock_order(x) do {} while (0)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
/*
* Set up the zone data structures:
* - mark all pages reserved
* - mark all memory queues empty
* - clear the memory bitmaps
*/
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
Christoph Lameter
committed
enum zone_type j;
int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
Yasunori Goto
committed
int ret;
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
pgdat->kswapd_max_order = 0;
pgdat_page_cgroup_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j,
zholes_size);
/*
* Adjust realsize so that it accounts for how much memory
* is used by this zone for memmap. This affects the watermark
* and per-cpu initialisations
*/
memmap_pages =
PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
if (memmap_pages)
printk(KERN_DEBUG
" %s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
" %s zone: %lu pages exceeds realsize %lu\n",
zone_names[j], memmap_pages, realsize);
Christoph Lameter
committed
/* Account for reserved pages */
if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
Christoph Lameter
committed
zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
nr_kernel_pages += realsize;
nr_all_pages += realsize;
zone->spanned_pages = size;
zone->present_pages = realsize;
Christoph Lameter
committed
#ifdef CONFIG_NUMA
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
Christoph Lameter
committed
/ 100;
zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
Christoph Lameter
committed
#endif
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->prev_priority = DEF_PRIORITY;
zone_pcp_init(zone);
for_each_lru(l) {
INIT_LIST_HEAD(&zone->lru[l].list);
zone->reclaim_stat.recent_rotated[0] = 0;
zone->reclaim_stat.recent_rotated[1] = 0;
zone->reclaim_stat.recent_scanned[0] = 0;
zone->reclaim_stat.recent_scanned[1] = 0;
zap_zone_vm_stats(zone);
set_pageblock_order(pageblock_default_order());
setup_usemap(pgdat, zone, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
Yasunori Goto
committed
BUG_ON(ret);
memmap_init(size, nid, j, zone_start_pfn);
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
/* Skip empty nodes */
if (!pgdat->node_spanned_pages)
return;
/* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) {
unsigned long size, start, end;
/*
* The zone's endpoints aren't required to be MAX_ORDER
* aligned but the node_mem_map endpoints must be in order
* for the buddy allocator to function correctly.
*/
start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
end = ALIGN(end, MAX_ORDER_NR_PAGES);
size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
/*
* With no DISCONTIG, the global mem_map is just set as node 0's
*/
if (pgdat == NODE_DATA(0)) {
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
}
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
pg_data_t *pgdat = NODE_DATA(nid);
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
calculate_node_totalpages(pgdat, zones_size, zholes_size);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
nid, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map);
#endif
free_area_init_core(pgdat, zones_size, zholes_size);
}
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
#if MAX_NUMNODES > 1
/*
* Figure out the number of possible node ids.
*/
static void __init setup_nr_node_ids(void)
{
unsigned int node;
unsigned int highest = 0;
for_each_node_mask(node, node_possible_map)
highest = node;
nr_node_ids = highest + 1;
}
#else
static inline void setup_nr_node_ids(void)
{
}
#endif
/**
* add_active_range - Register a range of PFNs backed by physical memory
* @nid: The node ID the range resides on
* @start_pfn: The start PFN of the available physical memory
* @end_pfn: The end PFN of the available physical memory
*
* These ranges are stored in an early_node_map[] and later used by
* free_area_init_nodes() to calculate zone sizes and holes. If the
* range spans a memory hole, it is up to the architecture to ensure
* the memory is not freed by the bootmem allocator. If possible
* the range being registered will be merged with existing ranges.
*/
void __init add_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
int i;
mminit_dprintk(MMINIT_TRACE, "memory_register",
"Entering add_active_range(%d, %#lx, %#lx) "
"%d entries of %d used\n",
nid, start_pfn, end_pfn,
nr_nodemap_entries, MAX_ACTIVE_REGIONS);
mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
/* Merge with existing active regions if possible */
for (i = 0; i < nr_nodemap_entries; i++) {
if (early_node_map[i].nid != nid)
continue;
/* Skip if an existing region covers this new one */
if (start_pfn >= early_node_map[i].start_pfn &&
end_pfn <= early_node_map[i].end_pfn)
return;
/* Merge forward if suitable */
if (start_pfn <= early_node_map[i].end_pfn &&
end_pfn > early_node_map[i].end_pfn) {
early_node_map[i].end_pfn = end_pfn;
return;
}
/* Merge backward if suitable */
if (start_pfn < early_node_map[i].end_pfn &&
end_pfn >= early_node_map[i].start_pfn) {
early_node_map[i].start_pfn = start_pfn;
return;
}
}
/* Check that early_node_map is large enough */
if (i >= MAX_ACTIVE_REGIONS) {
printk(KERN_CRIT "More than %d memory regions, truncating\n",
MAX_ACTIVE_REGIONS);
return;
}
early_node_map[i].nid = nid;
early_node_map[i].start_pfn = start_pfn;
early_node_map[i].end_pfn = end_pfn;
nr_nodemap_entries = i + 1;
}
/**
* remove_active_range - Shrink an existing registered range of PFNs
* @nid: The node id the range is on that should be shrunk
* @start_pfn: The new PFN of the range
* @end_pfn: The new PFN of the range
*
* i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
* The map is kept near the end physical page range that has already been
* registered. This function allows an arch to shrink an existing registered
* range.
void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn)
int i, j;
int removed = 0;
printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
nid, start_pfn, end_pfn);
/* Find the old active region end and shrink */
for_each_active_range_index_in_nid(i, nid) {
if (early_node_map[i].start_pfn >= start_pfn &&
early_node_map[i].end_pfn <= end_pfn) {
early_node_map[i].start_pfn = 0;
early_node_map[i].end_pfn = 0;
removed = 1;
continue;
}
if (early_node_map[i].start_pfn < start_pfn &&
early_node_map[i].end_pfn > start_pfn) {
unsigned long temp_end_pfn = early_node_map[i].end_pfn;
early_node_map[i].end_pfn = start_pfn;