Newer
Older
goto unlock_out;
} else if (page_mapped(page)) /* Anon */
goto unlock_out;
break;
default:
break;
mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages);
/*
* pc->mem_cgroup is not cleared here. It will be accessed when it's
* freed from LRU. This is safe because uncharged page is expected not
* to be reused (freed soon). Exception is SwapCache, it's handled by
* special functions.
*/
/*
* even after unlock, we have mem->res.usage here and this memcg
* will never be freed.
*/
memcg_check_events(mem, page);
if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
mem_cgroup_swap_statistics(mem, true);
mem_cgroup_get(mem);
}
if (!mem_cgroup_is_root(mem))
mem_cgroup_do_uncharge(mem, nr_pages, ctype);
KAMEZAWA Hiroyuki
committed
unlock_out:
unlock_page_cgroup(pc);
void mem_cgroup_uncharge_page(struct page *page)
{
/* early check. */
if (page_mapped(page))
return;
if (page->mapping && !PageAnon(page))
return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
VM_BUG_ON(page->mapping);
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}
/*
* Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
* In that cases, pages are freed continuously and we can expect pages
* are in the same memcg. All these calls itself limits the number of
* pages freed at once, then uncharge_start/end() is called properly.
* This may be called prural(2) times in a context,
*/
void mem_cgroup_uncharge_start(void)
{
current->memcg_batch.do_batch++;
/* We can do nest. */
if (current->memcg_batch.do_batch == 1) {
current->memcg_batch.memcg = NULL;
current->memcg_batch.nr_pages = 0;
current->memcg_batch.memsw_nr_pages = 0;
}
}
void mem_cgroup_uncharge_end(void)
{
struct memcg_batch_info *batch = ¤t->memcg_batch;
if (!batch->do_batch)
return;
batch->do_batch--;
if (batch->do_batch) /* If stacked, do nothing. */
return;
if (!batch->memcg)
return;
/*
* This "batch->memcg" is valid without any css_get/put etc...
* bacause we hide charges behind us.
*/
if (batch->nr_pages)
res_counter_uncharge(&batch->memcg->res,
batch->nr_pages * PAGE_SIZE);
if (batch->memsw_nr_pages)
res_counter_uncharge(&batch->memcg->memsw,
batch->memsw_nr_pages * PAGE_SIZE);
memcg_oom_recover(batch->memcg);
/* forget this pointer (for sanity check) */
batch->memcg = NULL;
}
#ifdef CONFIG_SWAP
* called after __delete_from_swap_cache() and drop "page" account.
* memcg information is recorded to swap_cgroup of "ent"
*/
void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
if (!swapout) /* this was a swap cache but the swap is unused ! */
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
memcg = __mem_cgroup_uncharge_common(page, ctype);
/*
* record memcg information, if swapout && memcg != NULL,
* mem_cgroup_get() was called in uncharge().
*/
if (do_swap_account && swapout && memcg)
swap_cgroup_record(ent, css_id(&memcg->css));
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
/*
* called from swap_entry_free(). remove record in swap_cgroup and
* uncharge "memsw" account.
*/
void mem_cgroup_uncharge_swap(swp_entry_t ent)
unsigned short id;
if (!do_swap_account)
return;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
if (!mem_cgroup_is_root(memcg))
KAMEZAWA Hiroyuki
committed
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
rcu_read_unlock();
/**
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
* @entry: swap entry to be moved
* @from: mem_cgroup which the entry is moved from
* @to: mem_cgroup which the entry is moved to
* @need_fixup: whether we should fixup res_counters and refcounts.
*
* It succeeds only when the swap_cgroup's record for this entry is the same
* as the mem_cgroup's id of @from.
*
* Returns 0 on success, -EINVAL on failure.
*
* The caller must have charged to @to, IOW, called res_counter_charge() about
* both res and memsw, and called css_get().
*/
static int mem_cgroup_move_swap_account(swp_entry_t entry,
struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
{
unsigned short old_id, new_id;
old_id = css_id(&from->css);
new_id = css_id(&to->css);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
mem_cgroup_swap_statistics(to, true);
* This function is only called from task migration context now.
* It postpones res_counter and refcount handling till the end
* of task migration(mem_cgroup_clear_mc()) for performance
* improvement. But we cannot postpone mem_cgroup_get(to)
* because if the process that has been moved to @to does
* swap-in, the refcount of @to might be decreased to 0.
if (need_fixup) {
if (!mem_cgroup_is_root(from))
res_counter_uncharge(&from->memsw, PAGE_SIZE);
mem_cgroup_put(from);
/*
* we charged both to->res and to->memsw, so we should
* uncharge to->res.
*/
if (!mem_cgroup_is_root(to))
res_counter_uncharge(&to->res, PAGE_SIZE);
}
return 0;
}
return -EINVAL;
}
#else
static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
{
return -EINVAL;
}
KAMEZAWA Hiroyuki
committed
/*
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
* page belongs to.
KAMEZAWA Hiroyuki
committed
*/
int mem_cgroup_prepare_migration(struct page *page,
struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
KAMEZAWA Hiroyuki
committed
{
struct page_cgroup *pc;
enum charge_type ctype;
*ptr = NULL;
return 0;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup;
css_get(&mem->css);
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
/*
* At migrating an anonymous page, its mapcount goes down
* to 0 and uncharge() will be called. But, even if it's fully
* unmapped, migration may fail and this page has to be
* charged again. We set MIGRATION flag here and delay uncharge
* until end_migration() is called
*
* Corner Case Thinking
* A)
* When the old page was mapped as Anon and it's unmap-and-freed
* while migration was ongoing.
* If unmap finds the old page, uncharge() of it will be delayed
* until end_migration(). If unmap finds a new page, it's
* uncharged when it make mapcount to be 1->0. If unmap code
* finds swap_migration_entry, the new page will not be mapped
* and end_migration() will find it(mapcount==0).
*
* B)
* When the old page was mapped but migraion fails, the kernel
* remaps it. A charge for it is kept by MIGRATION flag even
* if mapcount goes down to 0. We can do remap successfully
* without charging it again.
*
* C)
* The "old" page is under lock_page() until the end of
* migration, so, the old page itself will not be swapped-out.
* If the new page is swapped out before end_migraton, our
* hook to usual swap-out path will catch the event.
*/
if (PageAnon(page))
SetPageCgroupMigration(pc);
/*
* If the page is not charged at this point,
* we return here.
*/
if (!mem)
return 0;
ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
css_put(&mem->css);/* drop extra refcnt */
if (ret || *ptr == NULL) {
if (PageAnon(page)) {
lock_page_cgroup(pc);
ClearPageCgroupMigration(pc);
unlock_page_cgroup(pc);
/*
* The old page may be fully unmapped while we kept it.
*/
mem_cgroup_uncharge_page(page);
}
return -ENOMEM;
/*
* We charge new page before it's used/mapped. So, even if unlock_page()
* is called before end_migration, we can catch all events on this new
* page. In the case new page is migrated but not remapped, new page's
* mapcount will be finally 0 and we call uncharge in end_migration().
*/
pc = lookup_page_cgroup(newpage);
if (PageAnon(page))
ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
else if (page_is_file_cache(page))
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
else
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
__mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
KAMEZAWA Hiroyuki
committed
}
/* remove redundant charge if migration failed*/
void mem_cgroup_end_migration(struct mem_cgroup *mem,
struct page *oldpage, struct page *newpage, bool migration_ok)
KAMEZAWA Hiroyuki
committed
{
struct page *used, *unused;
struct page_cgroup *pc;
if (!mem)
return;
/* blocks rmdir() */
cgroup_exclude_rmdir(&mem->css);
if (!migration_ok) {
used = oldpage;
unused = newpage;
used = newpage;
* We disallowed uncharge of pages under migration because mapcount
* of the page goes down to zero, temporarly.
* Clear the flag and check the page should be charged.
pc = lookup_page_cgroup(oldpage);
lock_page_cgroup(pc);
ClearPageCgroupMigration(pc);
unlock_page_cgroup(pc);
__mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE);
* If a page is a file cache, radix-tree replacement is very atomic
* and we can skip this check. When it was an Anon page, its mapcount
* goes down to 0. But because we added MIGRATION flage, it's not
* uncharged yet. There are several case but page->mapcount check
* and USED bit check in mem_cgroup_uncharge_page() will do enough
* check. (see prepare_charge() also)
if (PageAnon(used))
mem_cgroup_uncharge_page(used);
* At migration, we may charge account against cgroup which has no
* tasks.
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
cgroup_release_and_wakeup_rmdir(&mem->css);
KAMEZAWA Hiroyuki
committed
}
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
if (likely(pc) && PageCgroupUsed(pc))
return pc;
return NULL;
}
bool mem_cgroup_bad_page_check(struct page *page)
{
if (mem_cgroup_disabled())
return false;
return lookup_page_cgroup_used(page) != NULL;
}
void mem_cgroup_print_bad_page(struct page *page)
{
struct page_cgroup *pc;
pc = lookup_page_cgroup_used(page);
if (pc) {
int ret = -1;
char *path;
printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p",
pc, pc->flags, pc->mem_cgroup);
path = kmalloc(PATH_MAX, GFP_KERNEL);
if (path) {
rcu_read_lock();
ret = cgroup_path(pc->mem_cgroup->css.cgroup,
path, PATH_MAX);
rcu_read_unlock();
}
printk(KERN_CONT "(%s)\n",
(ret < 0) ? "cannot get the path" : path);
kfree(path);
}
}
#endif
static DEFINE_MUTEX(set_limit_mutex);
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
int retry_count;
int children = mem_cgroup_count_children(memcg);
u64 curusage, oldusage;
/*
* For keeping hierarchical_reclaim simple, how long we should retry
* is depends on callers. We set our retry-count to be function
* of # of children which we should visit in this loop.
*/
retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
if (signal_pending(current)) {
ret = -EINTR;
break;
}
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
* We have to guarantee mem->res.limit < mem->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
if (memswlimit < val) {
ret = -EINVAL;
mutex_unlock(&set_limit_mutex);
memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
if (memlimit < val)
enlarge = 1;
ret = res_counter_set_limit(&memcg->res, val);
if (!ret) {
if (memswlimit == val)
memcg->memsw_is_minimum = true;
else
memcg->memsw_is_minimum = false;
}
mutex_unlock(&set_limit_mutex);
if (!ret)
break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_SHRINK,
NULL);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
retry_count--;
else
oldusage = curusage;
if (!ret && enlarge)
memcg_oom_recover(memcg);
static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
unsigned long long val)
int retry_count;
u64 memlimit, memswlimit, oldusage, curusage;
int children = mem_cgroup_count_children(memcg);
int ret = -EBUSY;
/* see mem_cgroup_resize_res_limit */
retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
while (retry_count) {
if (signal_pending(current)) {
ret = -EINTR;
break;
}
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
* We have to guarantee mem->res.limit < mem->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
if (memlimit > val) {
ret = -EINVAL;
mutex_unlock(&set_limit_mutex);
break;
}
memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
if (memswlimit < val)
enlarge = 1;
ret = res_counter_set_limit(&memcg->memsw, val);
if (!ret) {
if (memlimit == val)
memcg->memsw_is_minimum = true;
else
memcg->memsw_is_minimum = false;
}
mutex_unlock(&set_limit_mutex);
if (!ret)
break;
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
MEM_CGROUP_RECLAIM_NOSWAP |
MEM_CGROUP_RECLAIM_SHRINK,
NULL);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
else
oldusage = curusage;
if (!ret && enlarge)
memcg_oom_recover(memcg);
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask,
unsigned long *total_scanned)
{
unsigned long nr_reclaimed = 0;
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
unsigned long reclaimed;
int loop = 0;
struct mem_cgroup_tree_per_zone *mctz;
unsigned long long excess;
unsigned long nr_scanned;
if (order > 0)
return 0;
mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
/*
* This loop can run a while, specially if mem_cgroup's continuously
* keep exceeding their soft limit and putting the system under
* pressure
*/
do {
if (next_mz)
mz = next_mz;
else
mz = mem_cgroup_largest_soft_limit_node(mctz);
if (!mz)
break;
nr_scanned = 0;
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
gfp_mask,
MEM_CGROUP_RECLAIM_SOFT,
&nr_scanned);
nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
spin_lock(&mctz->lock);
/*
* If we failed to reclaim anything from this memory cgroup
* it is time to move on to the next cgroup
*/
next_mz = NULL;
if (!reclaimed) {
do {
/*
* Loop until we find yet another one.
*
* By the time we get the soft_limit lock
* again, someone might have aded the
* group back on the RB tree. Iterate to
* make sure we get a different mem.
* mem_cgroup_largest_soft_limit_node returns
* NULL if no other cgroup is present on
* the tree
*/
next_mz =
__mem_cgroup_largest_soft_limit_node(mctz);
Michal Hocko
committed
if (next_mz == mz)
css_put(&next_mz->mem->css);
Michal Hocko
committed
else /* next_mz == NULL or other memcg */
break;
} while (1);
}
__mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
excess = res_counter_soft_limit_excess(&mz->mem->res);
/*
* One school of thought says that we should not add
* back the node to the tree if reclaim returns 0.
* But our reclaim could return 0, simply because due
* to priority we are exposing a smaller subset of
* memory to reclaim from. Consider this as a longer
* term TODO.
*/
/* If excess == 0, no tree ops */
__mem_cgroup_insert_exceeded(mz->mem, mz, mctz, excess);
spin_unlock(&mctz->lock);
css_put(&mz->mem->css);
loop++;
/*
* Could not reclaim anything and there are no more
* mem cgroups to try or we seem to be looping without
* reclaiming anything.
*/
if (!nr_reclaimed &&
(next_mz == NULL ||
loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
break;
} while (!nr_reclaimed);
if (next_mz)
css_put(&next_mz->mem->css);
return nr_reclaimed;
}
KAMEZAWA Hiroyuki
committed
/*
* This routine traverse page_cgroup in given list and drop them all.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
KAMEZAWA Hiroyuki
committed
{
struct zone *zone;
struct mem_cgroup_per_zone *mz;
struct page_cgroup *pc, *busy;
KAMEZAWA Hiroyuki
committed
struct list_head *list;
KAMEZAWA Hiroyuki
committed
zone = &NODE_DATA(node)->node_zones[zid];
mz = mem_cgroup_zoneinfo(mem, node, zid);
list = &mz->lists[lru];
KAMEZAWA Hiroyuki
committed
loop = MEM_CGROUP_ZSTAT(mz, lru);
/* give some margin against EBUSY etc...*/
loop += 256;
busy = NULL;
while (loop--) {
struct page *page;
if (list_empty(list)) {
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
pc = list_entry(list->prev, struct page_cgroup, lru);
if (busy == pc) {
list_move(&pc->lru, list);
busy = NULL;
spin_unlock_irqrestore(&zone->lru_lock, flags);
spin_unlock_irqrestore(&zone->lru_lock, flags);
page = lookup_cgroup_page(pc);
ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
if (ret == -ENOMEM)
if (ret == -EBUSY || ret == -EINVAL) {
/* found lock contention or "pc" is obsolete. */
busy = pc;
cond_resched();
} else
busy = NULL;
KAMEZAWA Hiroyuki
committed
}
if (!ret && !list_empty(list))
return -EBUSY;
return ret;
KAMEZAWA Hiroyuki
committed
}
/*
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
KAMEZAWA Hiroyuki
committed
{
int ret;
int node, zid, shrink;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct cgroup *cgrp = mem->css.cgroup;
KAMEZAWA Hiroyuki
committed
css_get(&mem->css);
/* should free all ? */
if (free_all)
goto try_to_free;
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
goto out;
ret = -EINTR;
if (signal_pending(current))
KAMEZAWA Hiroyuki
committed
goto out;
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
drain_all_stock_sync(mem);
KAMEZAWA Hiroyuki
committed
mem_cgroup_start_move(mem);
for_each_node_state(node, N_HIGH_MEMORY) {
for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
for_each_lru(l) {
ret = mem_cgroup_force_empty_list(mem,
if (ret)
break;
}
KAMEZAWA Hiroyuki
committed
}
if (ret)
break;
}
KAMEZAWA Hiroyuki
committed
mem_cgroup_end_move(mem);
/* it seems parent cgroup doesn't have enough mem */
if (ret == -ENOMEM)
goto try_to_free;
/* "ret" should also be checked to ensure all lists are empty. */
} while (mem->res.usage > 0 || ret);
KAMEZAWA Hiroyuki
committed
out:
css_put(&mem->css);
return ret;
/* returns EBUSY if there is a task or if we come here twice. */
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
ret = -EBUSY;
goto out;
}
/* we call try-to-free pages for make this cgroup empty */
lru_add_drain_all();
/* try to free all pages in this cgroup */
shrink = 1;
while (nr_retries && mem->res.usage > 0) {
int progress;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
}
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
/* maybe some writeback is necessary */
congestion_wait(BLK_RW_ASYNC, HZ/10);
/* try move_account...there may be some *locked* pages. */
KAMEZAWA Hiroyuki
committed
}
int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
{
return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
}
static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
{
return mem_cgroup_from_cont(cont)->use_hierarchy;
}
static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
u64 val)
{
int retval = 0;
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
struct cgroup *parent = cont->parent;
struct mem_cgroup *parent_mem = NULL;
if (parent)
parent_mem = mem_cgroup_from_cont(parent);
cgroup_lock();
/*
* If parent's use_hierarchy is set, we can't make any modifications
* in the child subtrees. If it is unset, then the change can
* occur, provided the current cgroup has no children.
*
* For the root cgroup, parent_mem is NULL, we allow value to be
* set if there are no children.
*/
if ((!parent_mem || !parent_mem->use_hierarchy) &&
(val == 1 || val == 0)) {
if (list_empty(&cont->children))
mem->use_hierarchy = val;
else
retval = -EBUSY;
} else
retval = -EINVAL;
cgroup_unlock();
return retval;
}
static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
enum mem_cgroup_stat_index idx)
/* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, mem)
val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
val = 0;
return val;
static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap)
{
if (!mem_cgroup_is_root(mem)) {
if (!swap)
return res_counter_read_u64(&mem->res, RES_USAGE);
else
return res_counter_read_u64(&mem->memsw, RES_USAGE);
}
val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE);
val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS);
val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
return val << PAGE_SHIFT;
}
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
int type, name;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
if (name == RES_USAGE)
val = mem_cgroup_usage(mem, false);
else
val = res_counter_read_u64(&mem->res, name);
if (name == RES_USAGE)
val = mem_cgroup_usage(mem, true);
else
val = res_counter_read_u64(&mem->memsw, name);
break;
default:
BUG();
break;
}
return val;
/*
* The user of this function is...
* RES_LIMIT.
*/
Paul Menage
committed
static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
const char *buffer)
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
unsigned long long val;
int ret;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
switch (name) {
if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
ret = -EINVAL;
break;
}
/* This function does all necessary parse...reuse it */
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
break;
if (type == _MEM)
ret = mem_cgroup_resize_limit(memcg, val);
else
ret = mem_cgroup_resize_memsw_limit(memcg, val);
case RES_SOFT_LIMIT:
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
break;
/*
* For memsw, soft limits are hard to implement in terms
* of semantics, for now, we support soft limits for
* control without swap
*/
if (type == _MEM)
ret = res_counter_set_soft_limit(&memcg->res, val);
else
ret = -EINVAL;
break;
default:
ret = -EINVAL; /* should be BUG() ? */
break;
}
return ret;
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
unsigned long long *mem_limit, unsigned long long *memsw_limit)
{
struct cgroup *cgroup;
unsigned long long min_limit, min_memsw_limit, tmp;
min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
cgroup = memcg->css.cgroup;
if (!memcg->use_hierarchy)
goto out;
while (cgroup->parent) {
cgroup = cgroup->parent;
memcg = mem_cgroup_from_cont(cgroup);
if (!memcg->use_hierarchy)
break;
tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
min_limit = min(min_limit, tmp);
tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
min_memsw_limit = min(min_memsw_limit, tmp);
}
out:
*mem_limit = min_limit;
*memsw_limit = min_memsw_limit;
return;
}
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
{
struct mem_cgroup *mem;
mem = mem_cgroup_from_cont(cont);
type = MEMFILE_TYPE(event);
name = MEMFILE_ATTR(event);
switch (name) {
if (type == _MEM)
res_counter_reset_max(&mem->res);
else
res_counter_reset_max(&mem->memsw);
if (type == _MEM)
res_counter_reset_failcnt(&mem->res);
else
res_counter_reset_failcnt(&mem->memsw);
static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
struct cftype *cft)
{
return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
}
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
struct cftype *cft, u64 val)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
if (val >= (1 << NR_MOVE_TYPE))
return -EINVAL;