Commit 6d1fdc48 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

memcg: sanitize __mem_cgroup_try_charge() call protocol



Some callsites pass a memcg directly, some callsites pass an mm that
then has to be translated to a memcg.  This makes for a terrible
function interface.

Just push the mm-to-memcg translation into the respective callsites and
always pass a memcg to mem_cgroup_try_charge().

[mhocko@suse.cz: add charge mm helper]
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b6b6cc72
...@@ -2575,7 +2575,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, ...@@ -2575,7 +2575,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
} }
/* See __mem_cgroup_try_charge() for details */ /* See mem_cgroup_try_charge() for details */
enum { enum {
CHARGE_OK, /* success */ CHARGE_OK, /* success */
CHARGE_RETRY, /* need to retry but retry is not bad */ CHARGE_RETRY, /* need to retry but retry is not bad */
...@@ -2648,45 +2648,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2648,45 +2648,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
return CHARGE_NOMEM; return CHARGE_NOMEM;
} }
/* /**
* __mem_cgroup_try_charge() does * mem_cgroup_try_charge - try charging a memcg
* 1. detect memcg to be charged against from passed *mm and *ptr, * @memcg: memcg to charge
* 2. update res_counter * @nr_pages: number of pages to charge
* 3. call memory reclaim if necessary. * @oom: trigger OOM if reclaim fails
*
* In some special case, if the task is fatal, fatal_signal_pending() or
* has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
* to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
* as possible without any hazards. 2: all pages should have a valid
* pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
* pointer, that is treated as a charge to root_mem_cgroup.
*
* So __mem_cgroup_try_charge() will return
* 0 ... on success, filling *ptr with a valid memcg pointer.
* -ENOMEM ... charge failure because of resource limits.
* -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
* *
* Unlike the exported interface, an "oom" parameter is added. if oom==true, * Returns 0 if @memcg was charged successfully, -EINTR if the charge
* the oom-killer can be invoked. * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
*/ */
static int __mem_cgroup_try_charge(struct mm_struct *mm, static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
gfp_t gfp_mask, gfp_t gfp_mask,
unsigned int nr_pages, unsigned int nr_pages,
struct mem_cgroup **ptr, bool oom)
bool oom)
{ {
unsigned int batch = max(CHARGE_BATCH, nr_pages); unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *memcg = NULL;
int ret; int ret;
if (mem_cgroup_is_root(memcg))
goto done;
/* /*
* Unlike gloval-vm's OOM-kill, we're not in memory shortage * Unlike in global OOM situations, memcg is not in a physical
* in system level. So, allow to go ahead dying process in addition to * memory shortage. Allow dying and OOM-killed tasks to
* MEMDIE process. * bypass the last charges so that they can exit quickly and
* free their memory.
*/ */
if (unlikely(test_thread_flag(TIF_MEMDIE) if (unlikely(test_thread_flag(TIF_MEMDIE) ||
|| fatal_signal_pending(current))) fatal_signal_pending(current)))
goto bypass; goto bypass;
if (unlikely(task_in_memcg_oom(current))) if (unlikely(task_in_memcg_oom(current)))
...@@ -2695,14 +2684,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, ...@@ -2695,14 +2684,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
if (gfp_mask & __GFP_NOFAIL) if (gfp_mask & __GFP_NOFAIL)
oom = false; oom = false;
again: again:
if (*ptr) { /* css should be a valid one */
memcg = *ptr;
css_get(&memcg->css);
} else {
memcg = get_mem_cgroup_from_mm(mm);
}
if (mem_cgroup_is_root(memcg))
goto done;
if (consume_stock(memcg, nr_pages)) if (consume_stock(memcg, nr_pages))
goto done; goto done;
...@@ -2710,10 +2691,8 @@ again: ...@@ -2710,10 +2691,8 @@ again:
bool invoke_oom = oom && !nr_oom_retries; bool invoke_oom = oom && !nr_oom_retries;
/* If killed, bypass charge */ /* If killed, bypass charge */
if (fatal_signal_pending(current)) { if (fatal_signal_pending(current))
css_put(&memcg->css);
goto bypass; goto bypass;
}
ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
nr_pages, invoke_oom); nr_pages, invoke_oom);
...@@ -2722,17 +2701,12 @@ again: ...@@ -2722,17 +2701,12 @@ again:
break; break;
case CHARGE_RETRY: /* not in OOM situation but retry */ case CHARGE_RETRY: /* not in OOM situation but retry */
batch = nr_pages; batch = nr_pages;
css_put(&memcg->css);
memcg = NULL;
goto again; goto again;
case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
css_put(&memcg->css);
goto nomem; goto nomem;
case CHARGE_NOMEM: /* OOM routine works */ case CHARGE_NOMEM: /* OOM routine works */
if (!oom || invoke_oom) { if (!oom || invoke_oom)
css_put(&memcg->css);
goto nomem; goto nomem;
}
nr_oom_retries--; nr_oom_retries--;
break; break;
} }
...@@ -2741,19 +2715,43 @@ again: ...@@ -2741,19 +2715,43 @@ again:
if (batch > nr_pages) if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages); refill_stock(memcg, batch - nr_pages);
done: done:
css_put(&memcg->css);
*ptr = memcg;
return 0; return 0;
nomem: nomem:
if (!(gfp_mask & __GFP_NOFAIL)) { if (!(gfp_mask & __GFP_NOFAIL))
*ptr = NULL;
return -ENOMEM; return -ENOMEM;
}
bypass: bypass:
*ptr = root_mem_cgroup;
return -EINTR; return -EINTR;
} }
/**
* mem_cgroup_try_charge_mm - try charging a mm
* @mm: mm_struct to charge
* @nr_pages: number of pages to charge
* @oom: trigger OOM if reclaim fails
*
* Returns the charged mem_cgroup associated with the given mm_struct or
* NULL the charge failed.
*/
static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
gfp_t gfp_mask,
unsigned int nr_pages,
bool oom)
{
struct mem_cgroup *memcg;
int ret;
memcg = get_mem_cgroup_from_mm(mm);
ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
css_put(&memcg->css);
if (ret == -EINTR)
memcg = root_mem_cgroup;
else if (ret)
memcg = NULL;
return memcg;
}
/* /*
* Somemtimes we have to undo a charge we got by try_charge(). * Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt. * This function is for that and do uncharge, put css's refcnt.
...@@ -2949,20 +2947,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) ...@@ -2949,20 +2947,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
{ {
struct res_counter *fail_res; struct res_counter *fail_res;
struct mem_cgroup *_memcg;
int ret = 0; int ret = 0;
ret = res_counter_charge(&memcg->kmem, size, &fail_res); ret = res_counter_charge(&memcg->kmem, size, &fail_res);
if (ret) if (ret)
return ret; return ret;
_memcg = memcg; ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, oom_gfp_allowed(gfp));
&_memcg, oom_gfp_allowed(gfp));
if (ret == -EINTR) { if (ret == -EINTR) {
/* /*
* __mem_cgroup_try_charge() chosed to bypass to root due to * mem_cgroup_try_charge() chosed to bypass to root due to
* OOM kill or fatal signal. Since our only options are to * OOM kill or fatal signal. Since our only options are to
* either fail the allocation or charge it to this cgroup, do * either fail the allocation or charge it to this cgroup, do
* it as a temporary condition. But we can't fail. From a * it as a temporary condition. But we can't fail. From a
...@@ -2972,7 +2967,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) ...@@ -2972,7 +2967,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
* *
* This condition will only trigger if the task entered * This condition will only trigger if the task entered
* memcg_charge_kmem in a sane state, but was OOM-killed during * memcg_charge_kmem in a sane state, but was OOM-killed during
* __mem_cgroup_try_charge() above. Tasks that were already * mem_cgroup_try_charge() above. Tasks that were already
* dying when the allocation triggers should have been already * dying when the allocation triggers should have been already
* directed to the root cgroup in memcontrol.h * directed to the root cgroup in memcontrol.h
*/ */
...@@ -3826,10 +3821,9 @@ out: ...@@ -3826,10 +3821,9 @@ out:
int mem_cgroup_newpage_charge(struct page *page, int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask) struct mm_struct *mm, gfp_t gfp_mask)
{ {
struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = 1; unsigned int nr_pages = 1;
struct mem_cgroup *memcg;
bool oom = true; bool oom = true;
int ret;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return 0; return 0;
...@@ -3848,9 +3842,9 @@ int mem_cgroup_newpage_charge(struct page *page, ...@@ -3848,9 +3842,9 @@ int mem_cgroup_newpage_charge(struct page *page,
oom = false; oom = false;
} }
ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
if (ret == -ENOMEM) if (!memcg)
return ret; return -ENOMEM;
__mem_cgroup_commit_charge(memcg, page, nr_pages, __mem_cgroup_commit_charge(memcg, page, nr_pages,
MEM_CGROUP_CHARGE_TYPE_ANON, false); MEM_CGROUP_CHARGE_TYPE_ANON, false);
return 0; return 0;
...@@ -3867,7 +3861,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm, ...@@ -3867,7 +3861,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
gfp_t mask, gfp_t mask,
struct mem_cgroup **memcgp) struct mem_cgroup **memcgp)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg = NULL;
struct page_cgroup *pc; struct page_cgroup *pc;
int ret; int ret;
...@@ -3880,31 +3874,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm, ...@@ -3880,31 +3874,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
* in turn serializes uncharging. * in turn serializes uncharging.
*/ */
if (PageCgroupUsed(pc)) if (PageCgroupUsed(pc))
return 0; goto out;
if (!do_swap_account) if (do_swap_account)
goto charge_cur_mm; memcg = try_get_mem_cgroup_from_page(page);
memcg = try_get_mem_cgroup_from_page(page);
if (!memcg) if (!memcg)
goto charge_cur_mm; memcg = get_mem_cgroup_from_mm(mm);
*memcgp = memcg; ret = mem_cgroup_try_charge(memcg, mask, 1, true);
ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
css_put(&memcg->css); css_put(&memcg->css);
if (ret == -EINTR) if (ret == -EINTR)
ret = 0; memcg = root_mem_cgroup;
return ret; else if (ret)
charge_cur_mm: return ret;
ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); out:
if (ret == -EINTR) *memcgp = memcg;
ret = 0; return 0;
return ret;
} }
int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
gfp_t gfp_mask, struct mem_cgroup **memcgp) gfp_t gfp_mask, struct mem_cgroup **memcgp)
{ {
*memcgp = NULL; if (mem_cgroup_disabled()) {
if (mem_cgroup_disabled()) *memcgp = NULL;
return 0; return 0;
}
/* /*
* A racing thread's fault, or swapoff, may have already * A racing thread's fault, or swapoff, may have already
* updated the pte, and even removed page from swap cache: in * updated the pte, and even removed page from swap cache: in
...@@ -3912,12 +3904,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, ...@@ -3912,12 +3904,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
* there's also a KSM case which does need to charge the page. * there's also a KSM case which does need to charge the page.
*/ */
if (!PageSwapCache(page)) { if (!PageSwapCache(page)) {
int ret; struct mem_cgroup *memcg;
ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true); memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
if (ret == -EINTR) if (!memcg)
ret = 0; return -ENOMEM;
return ret; *memcgp = memcg;
return 0;
} }
return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
} }
...@@ -3964,8 +3957,8 @@ void mem_cgroup_commit_charge_swapin(struct page *page, ...@@ -3964,8 +3957,8 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
struct mem_cgroup *memcg = NULL;
enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
struct mem_cgroup *memcg;
int ret; int ret;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
...@@ -3973,23 +3966,28 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, ...@@ -3973,23 +3966,28 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
if (PageCompound(page)) if (PageCompound(page))
return 0; return 0;
if (!PageSwapCache(page)) { if (PageSwapCache(page)) { /* shmem */
/*
* Page cache insertions can happen without an actual
* task context, e.g. during disk probing on boot.
*/
if (!mm)
memcg = root_mem_cgroup;
ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
if (ret != -ENOMEM)
__mem_cgroup_commit_charge(memcg, page, 1, type, false);
} else { /* page is swapcache/shmem */
ret = __mem_cgroup_try_charge_swapin(mm, page, ret = __mem_cgroup_try_charge_swapin(mm, page,
gfp_mask, &memcg); gfp_mask, &memcg);
if (!ret) if (ret)
__mem_cgroup_commit_charge_swapin(page, memcg, type); return ret;
__mem_cgroup_commit_charge_swapin(page, memcg, type);
return 0;
} }
return ret;
/*
* Page cache insertions can happen without an actual mm
* context, e.g. during disk probing on boot.
*/
if (unlikely(!mm))
memcg = root_mem_cgroup;
else {
memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
if (!memcg)
return -ENOMEM;
}
__mem_cgroup_commit_charge(memcg, page, 1, type, false);
return 0;
} }
static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
...@@ -6601,8 +6599,7 @@ one_by_one: ...@@ -6601,8 +6599,7 @@ one_by_one:
batch_count = PRECHARGE_COUNT_AT_ONCE; batch_count = PRECHARGE_COUNT_AT_ONCE;
cond_resched(); cond_resched();
} }
ret = __mem_cgroup_try_charge(NULL, ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
GFP_KERNEL, 1, &memcg, false);
if (ret) if (ret)
/* mem_cgroup_clear_mc() will do uncharge later */ /* mem_cgroup_clear_mc() will do uncharge later */
return ret; return ret;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment