oss-sec mailing list archives
[PATCH 1/2] oom: don't ignore rss in nascent mm
From: KOSAKI Motohiro <kosaki.motohiro () jp fujitsu com>
Date: Thu, 9 Sep 2010 14:03:55 +0900 (JST)
This patch was made on top "oom: remove totalpage normalization from oom_badness()" patch.
===============================
Execve() makes new mm struct and setup stack and push argv vector,
Unfortunately this nascent mm is not pointed any tasks, then
OOM-killer can't detect this memory usage. therefore OOM-killer
may kill incorrect task.
Thus, this patch added task->in_exec_mm member and track
nascent mm usage.
Cc: pageexec () freemail hu
Cc: Roland McGrath <roland () redhat com>
Cc: Solar Designer <solar () openwall com>
Cc: Brad Spengler <spender () grsecurity net>
Cc: Eugene Teo <eteo () redhat com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro () jp fujitsu com>
---
fs/compat.c | 4 +++-
fs/exec.c | 14 +++++++++++++-
include/linux/binfmts.h | 1 +
include/linux/sched.h | 1 +
mm/oom_kill.c | 37 +++++++++++++++++++++++++++++--------
5 files changed, 47 insertions(+), 10 deletions(-)
diff --git a/fs/compat.c b/fs/compat.c
index 718c706..b631120 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1567,8 +1567,10 @@ int compat_do_execve(char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ set_exec_mm(NULL);
mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 2d94552..b41834c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -347,6 +347,8 @@ int bprm_mm_init(struct linux_binprm *bprm)
if (err)
goto err;
+ set_exec_mm(bprm->mm);
+
return 0;
err:
@@ -983,6 +985,7 @@ int flush_old_exec(struct linux_binprm * bprm)
goto out;
bprm->mm = NULL; /* We're using it now */
+ set_exec_mm(NULL);
current->flags &= ~PF_RANDOMIZE;
flush_thread();
@@ -1314,6 +1317,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
EXPORT_SYMBOL(search_binary_handler);
+void set_exec_mm(struct mm_struct *mm)
+{
+ task_lock(current);
+ current->in_exec_mm = mm;
+ task_unlock(current);
+}
+
/*
* sys_execve() executes a new program.
*/
@@ -1402,8 +1412,10 @@ int do_execve(const char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ set_exec_mm(NULL);
mmput (bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a065612..2fde1ba 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -133,6 +133,7 @@ extern void install_exec_creds(struct linux_binprm *bprm);
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);
+extern void set_exec_mm(struct mm_struct *mm);
#endif /* __KERNEL__ */
#endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e61d60..bb5bf3d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1226,6 +1226,7 @@ struct task_struct {
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned int personality;
+ struct mm_struct *in_exec_mm;
unsigned did_exec:1;
unsigned in_execve:1; /* Tell the LSMs that the process is doing an
* execve */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c1beda0..7d38435 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -120,6 +120,33 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
return NULL;
}
+/*
+ * The baseline for the badness score is the proportion of RAM that each
+ * task's rss and swap space use.
+ */
+static unsigned long oom_rss_swap_usage(struct task_struct *p)
+{
+ struct task_struct *t = p;
+ int mm_accounted = 0;
+ unsigned long points = 0;
+
+ do {
+ task_lock(t);
+ if (!mm_accounted && t->mm) {
+ points += get_mm_rss(t->mm);
+ points += get_mm_counter(t->mm, MM_SWAPENTS);
+ mm_accounted = 1;
+ }
+ if (t->in_exec_mm) {
+ points += get_mm_rss(t->in_exec_mm);
+ points += get_mm_counter(t->in_exec_mm, MM_SWAPENTS);
+ }
+ task_unlock(t);
+ } while_each_thread(p, t);
+
+ return points;
+}
+
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem,
const nodemask_t *nodemask)
@@ -169,16 +196,10 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *mem,
if (p->flags & PF_OOM_ORIGIN)
return ULONG_MAX;
- p = find_lock_task_mm(p);
- if (!p)
+ points = oom_rss_swap_usage(p);
+ if (!points)
return 0;
- /*
- * The baseline for the badness score is the proportion of RAM that each
- * task's rss and swap space use.
- */
- points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS));
- task_unlock(p);
/*
* Root processes get 3% bonus, just like the __vm_enough_memory()
--
1.6.5.2
Current thread:
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size, (continued)
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size pageexec (Sep 15)
- Message not available
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size Roland McGrath (Sep 10)
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size pageexec (Sep 11)
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size Roland McGrath (Sep 14)
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size pageexec (Sep 14)
- Message not available
- Re: [PATCH 1/3] setup_arg_pages: diagnose excessive argument size Roland McGrath (Sep 10)
- [PATCH 2/3] execve: improve interactivity with large arguments Roland McGrath (Sep 07)
- [PATCH 3/3] execve: make responsive to SIGKILL with large arguments Roland McGrath (Sep 07)
- Re: [PATCH 0/3] execve argument-copying fixes KOSAKI Motohiro (Sep 07)
- [PATCH 0/2] execve memory exhaust of argument-copying fixes KOSAKI Motohiro (Sep 09)
- [PATCH 1/2] oom: don't ignore rss in nascent mm KOSAKI Motohiro (Sep 09)
- Message not available
- Re: [PATCH 1/2] oom: don't ignore rss in nascent mm Roland McGrath (Sep 10)
- Message not available
- [PATCH] move cred_guard_mutex from task_struct to signal_struct KOSAKI Motohiro (Sep 10)
- Re: [PATCH] move cred_guard_mutex from task_struct to signal_struct Oleg Nesterov (Sep 10)
- Re: [PATCH] move cred_guard_mutex from task_struct to signal_struct KOSAKI Motohiro (Sep 15)
- [PATCH 2/2] execve: check the VM has enough memory at first KOSAKI Motohiro (Sep 09)
- Re: [PATCH 2/2] execve: check the VM has enough memory at first Linus Torvalds (Sep 10)
- Re: [PATCH 2/2] execve: check the VM has enough memory at first KOSAKI Motohiro (Sep 13)
- Re: [PATCH 2/2] execve: check the VM has enough memory at first KOSAKI Motohiro (Sep 15)
- Re: [PATCH 2/2] execve: check the VM has enough memory at first Linus Torvalds (Sep 16)
- Re: [PATCH] exec argument expansion can inappropriately trigger OOM-killer Solar Designer (Aug 30)
