Newer
Older
/*
* linux/mm/memory_hotplug.c
*
* Copyright (C)
*/
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/pagevec.h>
Chandra Seetharaman
committed
#include <linux/writeback.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memremap.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/mm_inline.h>
#include <linux/firmware-map.h>
#include <linux/hugetlb.h>
#include <linux/bootmem.h>
#include <asm/tlbflush.h>
/*
* online_page_callback contains pointer to current page onlining function.
* Initially it is generic_online_page(). If it is required it could be
* changed by calling set_online_page_callback() for callback registration
* and restore_online_page_callback() for generic callback restore.
*/
static void generic_online_page(struct page *page);
static online_page_callback_t online_page_callback = generic_online_page;
static DEFINE_MUTEX(online_page_callback_lock);
DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
void get_online_mems(void)
{
percpu_down_read(&mem_hotplug_lock);
}
void put_online_mems(void)
{
percpu_up_read(&mem_hotplug_lock);
}
bool movable_node_enabled = false;
#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
bool memhp_auto_online;
#else
bool memhp_auto_online = true;
#endif
EXPORT_SYMBOL_GPL(memhp_auto_online);
static int __init setup_memhp_default_state(char *str)
{
if (!strcmp(str, "online"))
memhp_auto_online = true;
else if (!strcmp(str, "offline"))
memhp_auto_online = false;
return 1;
}
__setup("memhp_default_state=", setup_memhp_default_state);
void mem_hotplug_begin(void)
cpus_read_lock();
percpu_down_write(&mem_hotplug_lock);
void mem_hotplug_done(void)
percpu_up_write(&mem_hotplug_lock);
cpus_read_unlock();
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
struct resource *res, *conflict;
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (!res)
return ERR_PTR(-ENOMEM);
res->name = "System RAM";
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
conflict = request_resource_conflict(&iomem_resource, res);
if (conflict) {
if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
pr_debug("Device unaddressable memory block "
"memory hotplug at %#010llx !\n",
(unsigned long long)start);
}
pr_debug("System RAM resource %pR cannot be added\n", res);
kfree(res);
return ERR_PTR(-EEXIST);
}
return res;
}
static void release_memory_resource(struct resource *res)
{
if (!res)
return;
release_resource(res);
kfree(res);
return;
}
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
Yasuaki Ishimatsu
committed
void get_page_bootmem(unsigned long info, struct page *page,
unsigned long type)
Yasuaki Ishimatsu
committed
page->freelist = (void *)type;
SetPagePrivate(page);
set_page_private(page, info);
void put_page_bootmem(struct page *page)
Yasuaki Ishimatsu
committed
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
Yasuaki Ishimatsu
committed
page->freelist = NULL;
ClearPagePrivate(page);
set_page_private(page, 0);
free_reserved_page(page);
Yasuaki Ishimatsu
committed
#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
#ifndef CONFIG_SPARSEMEM_VMEMMAP
static void register_page_bootmem_info_section(unsigned long start_pfn)
{
unsigned long *usemap, mapsize, section_nr, i;
struct mem_section *ms;
struct page *page, *memmap;
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
/* Get section's memmap address */
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
/*
* Get page for the memmap's phys address
* XXX: need more consideration for sparse_vmemmap...
*/
page = virt_to_page(memmap);
mapsize = sizeof(struct page) * PAGES_PER_SECTION;
mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
/* remember memmap's page */
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, SECTION_INFO);
Oscar Salvador
committed
usemap = ms->pageblock_flags;
page = virt_to_page(usemap);
mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
Yasuaki Ishimatsu
committed
#else /* CONFIG_SPARSEMEM_VMEMMAP */
static void register_page_bootmem_info_section(unsigned long start_pfn)
{
unsigned long *usemap, mapsize, section_nr, i;
struct mem_section *ms;
struct page *page, *memmap;
section_nr = pfn_to_section_nr(start_pfn);
ms = __nr_to_section(section_nr);
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
Oscar Salvador
committed
usemap = ms->pageblock_flags;
Yasuaki Ishimatsu
committed
page = virt_to_page(usemap);
mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
for (i = 0; i < mapsize; i++, page++)
get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
{
unsigned long i, pfn, end_pfn, nr_pages;
int node = pgdat->node_id;
struct page *page;
nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
page = virt_to_page(pgdat);
for (i = 0; i < nr_pages; i++, page++)
get_page_bootmem(node, page, NODE_INFO);
pfn = pgdat->node_start_pfn;
end_pfn = pgdat_end_pfn(pgdat);
/* register section info */
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
/*
* Some platforms can assign the same pfn to multiple nodes - on
* node0 as well as nodeN. To avoid registering a pfn against
* multiple nodes we check that this pfn does not already
* reside in some other nodes.
if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
register_page_bootmem_info_section(pfn);
}
Yasuaki Ishimatsu
committed
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
struct vmem_altmap *altmap, bool want_memblock)
if (pfn_valid(phys_start_pfn))
return -EEXIST;
ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
if (ret < 0)
return ret;
if (!want_memblock)
return 0;
return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn));
/*
* Reasonably generic function for adding memory. It is
* expected that archs that support memory hotplug will
* call this function after deciding the zone to which to
* add the new pages.
*/
int __ref __add_pages(int nid, unsigned long phys_start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
bool want_memblock)
{
unsigned long i;
int err = 0;
int start_sec, end_sec;
/* during initialize mem_map, align hot-added range to section */
start_sec = pfn_to_section_nr(phys_start_pfn);
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
if (altmap) {
/*
* Validate altmap is within bounds of the total request
*/
if (altmap->base_pfn != phys_start_pfn
|| vmem_altmap_offset(altmap) > nr_pages) {
pr_warn_once("memory add fail, invalid altmap\n");
err = -EINVAL;
goto out;
}
altmap->alloc = 0;
}
for (i = start_sec; i <= end_sec; i++) {
err = __add_section(nid, section_nr_to_pfn(i), altmap,
want_memblock);
/*
* EEXIST is finally dealt with by ioresource collision
* check. see add_memory() => register_memory_resource()
* Warning will be printed if there is collision.
*/
if (err && (err != -EEXIST))
break;
err = 0;
}
vmemmap_populate_print_last();
out:
return err;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
YASUAKI ISHIMATSU
committed
static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn)
{
struct mem_section *ms;
for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
ms = __pfn_to_section(start_pfn);
if (unlikely(!valid_section(ms)))
continue;
if (unlikely(pfn_to_nid(start_pfn) != nid))
continue;
if (zone && zone != page_zone(pfn_to_page(start_pfn)))
continue;
return start_pfn;
}
return 0;
}
/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
YASUAKI ISHIMATSU
committed
static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
unsigned long start_pfn,
unsigned long end_pfn)
{
struct mem_section *ms;
unsigned long pfn;
/* pfn is the end pfn of a memory section. */
pfn = end_pfn - 1;
for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
ms = __pfn_to_section(pfn);
if (unlikely(!valid_section(ms)))
continue;
if (unlikely(pfn_to_nid(pfn) != nid))
continue;
if (zone && zone != page_zone(pfn_to_page(pfn)))
continue;
return pfn;
}
return 0;
}
static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
unsigned long zone_end_pfn = z;
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
unsigned long pfn;
struct mem_section *ms;
int nid = zone_to_nid(zone);
zone_span_writelock(zone);
if (zone_start_pfn == start_pfn) {
/*
* If the section is smallest section in the zone, it need
* shrink zone->zone_start_pfn and zone->zone_spanned_pages.
* In this case, we find second smallest valid mem_section
* for shrinking zone.
*/
pfn = find_smallest_section_pfn(nid, zone, end_pfn,
zone_end_pfn);
if (pfn) {
zone->zone_start_pfn = pfn;
zone->spanned_pages = zone_end_pfn - pfn;
}
} else if (zone_end_pfn == end_pfn) {
/*
* If the section is biggest section in the zone, it need
* shrink zone->spanned_pages.
* In this case, we find second biggest valid mem_section for
* shrinking zone.
*/
pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
start_pfn);
if (pfn)
zone->spanned_pages = pfn - zone_start_pfn + 1;
}
/*
* The section is not biggest or smallest mem_section in the zone, it
* only creates a hole in the zone. So in this case, we need not
* change the zone. But perhaps, the zone has only hole data. Thus
* it check the zone has only hole or not.
*/
pfn = zone_start_pfn;
for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
ms = __pfn_to_section(pfn);
if (unlikely(!valid_section(ms)))
continue;
if (page_zone(pfn_to_page(pfn)) != zone)
continue;
/* If the section is current section, it continues the loop */
if (start_pfn == pfn)
continue;
/* If we find valid section, we have nothing to do */
zone_span_writeunlock(zone);
return;
}
/* The zone has no valid section */
zone->zone_start_pfn = 0;
zone->spanned_pages = 0;
zone_span_writeunlock(zone);
}
static void shrink_pgdat_span(struct pglist_data *pgdat,
unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
unsigned long pgdat_end_pfn = p;
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
unsigned long pfn;
struct mem_section *ms;
int nid = pgdat->node_id;
if (pgdat_start_pfn == start_pfn) {
/*
* If the section is smallest section in the pgdat, it need
* shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
* In this case, we find second smallest valid mem_section
* for shrinking zone.
*/
pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
pgdat_end_pfn);
if (pfn) {
pgdat->node_start_pfn = pfn;
pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
}
} else if (pgdat_end_pfn == end_pfn) {
/*
* If the section is biggest section in the pgdat, it need
* shrink pgdat->node_spanned_pages.
* In this case, we find second biggest valid mem_section for
* shrinking zone.
*/
pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
start_pfn);
if (pfn)
pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
}
/*
* If the section is not biggest or smallest mem_section in the pgdat,
* it only creates a hole in the pgdat. So in this case, we need not
* change the pgdat.
* But perhaps, the pgdat has only hole data. Thus it check the pgdat
* has only hole or not.
*/
pfn = pgdat_start_pfn;
for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
ms = __pfn_to_section(pfn);
if (unlikely(!valid_section(ms)))
continue;
if (pfn_to_nid(pfn) != nid)
continue;
/* If the section is current section, it continues the loop */
if (start_pfn == pfn)
continue;
/* If we find valid section, we have nothing to do */
return;
}
/* The pgdat has no valid section */
pgdat->node_start_pfn = 0;
pgdat->node_spanned_pages = 0;
}
static void __remove_zone(struct zone *zone, unsigned long start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nr_pages = PAGES_PER_SECTION;
unsigned long flags;
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
}
static int __remove_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap)
unsigned long start_pfn;
int scn_nr;
int ret = -EINVAL;
if (!valid_section(ms))
return ret;
ret = unregister_memory_section(ms);
if (ret)
return ret;
scn_nr = __section_nr(ms);
YASUAKI ISHIMATSU
committed
start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
__remove_zone(zone, start_pfn);
sparse_remove_one_section(zone, ms, map_offset, altmap);
return 0;
}
/**
* __remove_pages() - remove sections of pages from a zone
* @zone: zone from which pages need to be removed
* @phys_start_pfn: starting pageframe (must be aligned to start of a section)
* @nr_pages: number of pages to remove (must be multiple of section size)
*
* Generic helper function to remove section mappings and sysfs entries
* for the section of the memory we are removing. Caller needs to make
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
unsigned long i;
unsigned long map_offset = 0;
int sections_to_remove, ret = 0;
/* In the ZONE_DEVICE case device driver owns the memory region */
if (is_dev_zone(zone)) {
if (altmap)
map_offset = vmem_altmap_offset(altmap);
} else {
resource_size_t start, size;
start = phys_start_pfn << PAGE_SHIFT;
size = nr_pages * PAGE_SIZE;
ret = release_mem_region_adjustable(&iomem_resource, start,
size);
if (ret) {
resource_size_t endres = start + size - 1;
pr_warn("Unable to release resource <%pa-%pa> (%d)\n",
&start, &endres, ret);
}
}
clear_zone_contiguous(zone);
/*
* We can only remove entire sections
*/
BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
BUG_ON(nr_pages % PAGES_PER_SECTION);
sections_to_remove = nr_pages / PAGES_PER_SECTION;
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap);
map_offset = 0;
if (ret)
break;
}
set_zone_contiguous(zone);
return ret;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
int set_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
get_online_mems();
mutex_lock(&online_page_callback_lock);
if (online_page_callback == generic_online_page) {
online_page_callback = callback;
rc = 0;
}
mutex_unlock(&online_page_callback_lock);
put_online_mems();
return rc;
}
EXPORT_SYMBOL_GPL(set_online_page_callback);
int restore_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
get_online_mems();
mutex_lock(&online_page_callback_lock);
if (online_page_callback == callback) {
online_page_callback = generic_online_page;
rc = 0;
}
mutex_unlock(&online_page_callback_lock);
put_online_mems();
return rc;
}
EXPORT_SYMBOL_GPL(restore_online_page_callback);
void __online_page_set_limits(struct page *page)
}
EXPORT_SYMBOL_GPL(__online_page_set_limits);
void __online_page_increment_counters(struct page *page)
{
adjust_managed_page_count(page, 1);
}
EXPORT_SYMBOL_GPL(__online_page_increment_counters);
void __online_page_free(struct page *page)
{
EXPORT_SYMBOL_GPL(__online_page_free);
static void generic_online_page(struct page *page)
{
__online_page_set_limits(page);
__online_page_increment_counters(page);
__online_page_free(page);
}
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg)
{
unsigned long i;
unsigned long onlined_pages = *(unsigned long *)arg;
struct page *page;
if (PageReserved(pfn_to_page(start_pfn)))
for (i = 0; i < nr_pages; i++) {
page = pfn_to_page(start_pfn + i);
(*online_page_callback)(page);
online_mem_sections(start_pfn, start_pfn + nr_pages);
*(unsigned long *)arg = onlined_pages;
return 0;
}
/* check which state of node_states will be changed when online memory */
static void node_states_check_changes_online(unsigned long nr_pages,
struct zone *zone, struct memory_notify *arg)
{
int nid = zone_to_nid(zone);
enum zone_type zone_last = ZONE_NORMAL;
/*
* If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
* contains nodes which have zones of 0...ZONE_NORMAL,
* set zone_last to ZONE_NORMAL.
* If we don't have HIGHMEM nor movable node,
* node_states[N_NORMAL_MEMORY] contains nodes which have zones of
* 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
zone_last = ZONE_MOVABLE;
/*
* if the memory to be online is in a zone of 0...zone_last, and
* the zones of 0...zone_last don't have memory before online, we will
* need to set the node to node_states[N_NORMAL_MEMORY] after
* the memory is online.
*/
if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
arg->status_change_nid_normal = nid;
else
arg->status_change_nid_normal = -1;
#ifdef CONFIG_HIGHMEM
/*
* If we have movable node, node_states[N_HIGH_MEMORY]
* contains nodes which have zones of 0...ZONE_HIGHMEM,
* set zone_last to ZONE_HIGHMEM.
*
* If we don't have movable node, node_states[N_NORMAL_MEMORY]
* contains nodes which have zones of 0...ZONE_MOVABLE,
* set zone_last to ZONE_MOVABLE.
*/
zone_last = ZONE_HIGHMEM;
if (N_MEMORY == N_HIGH_MEMORY)
zone_last = ZONE_MOVABLE;
if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
arg->status_change_nid_high = nid;
else
arg->status_change_nid_high = -1;
#else
arg->status_change_nid_high = arg->status_change_nid_normal;
#endif
/*
* if the node don't have memory befor online, we will need to
* set the node to node_states[N_MEMORY] after the memory
* is online.
*/
arg->status_change_nid = nid;
else
arg->status_change_nid = -1;
}
static void node_states_set_node(int node, struct memory_notify *arg)
{
if (arg->status_change_nid_normal >= 0)
node_set_state(node, N_NORMAL_MEMORY);
if (arg->status_change_nid_high >= 0)
node_set_state(node, N_HIGH_MEMORY);
node_set_state(node, N_MEMORY);
}
static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long old_end_pfn = zone_end_pfn(zone);
if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
zone->zone_start_pfn = start_pfn;
zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
}
static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
unsigned long nr_pages)
{
unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
pgdat->node_start_pfn = start_pfn;
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
}
void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
unsigned long flags;
if (zone_is_empty(zone))
init_currently_empty_zone(zone, start_pfn, nr_pages);
clear_zone_contiguous(zone);
/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
pgdat_resize_lock(pgdat, &flags);
zone_span_writelock(zone);
resize_zone_range(zone, start_pfn, nr_pages);
zone_span_writeunlock(zone);
resize_pgdat_range(pgdat, start_pfn, nr_pages);
pgdat_resize_unlock(pgdat, &flags);
/*
* TODO now we have a visible range of pages which are not associated
* with their zone properly. Not nice but set_pfnblock_flags_mask
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
MEMMAP_HOTPLUG, altmap);
set_zone_contiguous(zone);
}
/*
* Returns a default kernel memory zone for the given pfn range.
* If no kernel zone covers this pfn range it will automatically go
* to the ZONE_NORMAL.
*/
static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
unsigned long nr_pages)
{
struct pglist_data *pgdat = NODE_DATA(nid);
int zid;
for (zid = 0; zid <= ZONE_NORMAL; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
if (zone_intersects(zone, start_pfn, nr_pages))
return zone;
}
return &pgdat->node_zones[ZONE_NORMAL];
}
static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
unsigned long nr_pages)
struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
nr_pages);
struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
/*
* We inherit the existing zone in a simple case where zones do not
* overlap in the given range
if (in_kernel ^ in_movable)
return (in_kernel) ? kernel_zone : movable_zone;
/*
* If the range doesn't belong to any zone or two zones overlap in the
* given range then we use movable zone only if movable_node is
* enabled because we always online to a kernel zone by default.
*/
return movable_node_enabled ? movable_zone : kernel_zone;
struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
unsigned long nr_pages)
{
if (online_type == MMOP_ONLINE_KERNEL)
return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
if (online_type == MMOP_ONLINE_MOVABLE)
return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
return default_zone_for_pfn(nid, start_pfn, nr_pages);
}
/*
* Associates the given pfn range with the given node and the zone appropriate
* for the given online type.
*/
static struct zone * __meminit move_pfn_range(int online_type, int nid,
unsigned long start_pfn, unsigned long nr_pages)
{
struct zone *zone;
zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
return zone;
/* Must be protected by mem_hotplug_begin() or a device_lock */
int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
unsigned long flags;
unsigned long onlined_pages = 0;
struct zone *zone;
Yasunori Goto
committed
int need_zonelists_rebuild = 0;
int nid;
int ret;
struct memory_notify arg;
struct memory_block *mem;
/*
* We can't use pfn_to_nid() because nid might be stored in struct page
* which is not yet initialized. Instead, we find nid from memory block.
*/
mem = find_memory_block(__pfn_to_section(pfn));
nid = mem->nid;
/* associate pfn range with the zone */
zone = move_pfn_range(online_type, nid, pfn, nr_pages);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
node_states_check_changes_online(nr_pages, zone, &arg);
ret = memory_notify(MEM_GOING_ONLINE, &arg);
ret = notifier_to_errno(ret);
if (ret)
goto failed_addition;
Yasunori Goto
committed
/*
* If this zone is not populated, then it is not in zonelist.
* This means the page allocator ignores this zone.
* So, zonelist must be updated after online.
*/
if (!populated_zone(zone)) {
Yasunori Goto
committed
need_zonelists_rebuild = 1;
setup_zone_pageset(zone);
Yasunori Goto
committed
ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
if (ret) {
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
goto failed_addition;
}
zone->present_pages += onlined_pages;
pgdat_resize_lock(zone->zone_pgdat, &flags);
zone->zone_pgdat->node_present_pages += onlined_pages;
pgdat_resize_unlock(zone->zone_pgdat, &flags);
if (onlined_pages) {
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
else
zone_pcp_update(zone);
}
init_per_zone_wmark_min();
vm_total_pages = nr_free_pagecache_pages();
Kent Liu
committed
Chandra Seetharaman
committed
writeback_set_ratelimit();
if (onlined_pages)
memory_notify(MEM_ONLINE, &arg);
failed_addition:
pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
return ret;
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
static void reset_node_present_pages(pg_data_t *pgdat)
{
struct zone *z;
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
z->present_pages = 0;
pgdat->node_present_pages = 0;
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
struct pglist_data *pgdat;
unsigned long zones_size[MAX_NR_ZONES] = {0};
unsigned long zholes_size[MAX_NR_ZONES] = {0};
unsigned long start_pfn = PFN_DOWN(start);
pgdat = NODE_DATA(nid);
if (!pgdat) {
pgdat = arch_alloc_nodedata(nid);
if (!pgdat)
return NULL;
arch_refresh_nodedata(nid, pgdat);
Mel Gorman
committed
/*
* Reset the nr_zones, order and classzone_idx before reuse.
* Note that kswapd will init kswapd_classzone_idx properly
* when it starts in the near future.
*/