X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Foom_kill.c;h=3c9040a96a90fb2d362f0ad529111a37d1b3e7a6;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=2f15290492b9d6c49a31fd6cb75d91f0474ee8f9;hpb=a8e794ca871505c8ea96cc102f4ad555c5231d7f;p=linux-2.6.git diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2f1529049..3c9040a96 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -26,6 +26,7 @@ /** * oom_badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate + * @p: current uptime in seconds * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task @@ -41,28 +42,45 @@ * of least surprise ... (be careful when you change it) */ -static int badness(struct task_struct *p) +unsigned long badness(struct task_struct *p, unsigned long uptime) { - int points, cpu_time, run_time, s; + unsigned long points, cpu_time, run_time, s; + struct list_head *tsk; if (!p->mm) return 0; - if (p->flags & PF_MEMDIE) - return 0; /* * The memory size of the process is the basis for the badness. */ points = p->mm->total_vm; - /* add vserver badness ;) */ + /* FIXME add vserver badness ;) */ + + /* + * Processes which fork a lot of child processes are likely + * a good choice. We add the vmsize of the childs if they + * have an own mm. This prevents forking servers to flood the + * machine with an endless amount of childs + */ + list_for_each(tsk, &p->children) { + struct task_struct *chld; + chld = list_entry(tsk, struct task_struct, sibling); + if (chld->mm != p->mm && chld->mm) + points += chld->mm->total_vm; + } /* - * CPU time is in seconds and run time is in minutes. There is no - * particular reason for this other than that it turned out to work - * very well in practice. + * CPU time is in tens of seconds and run time is in thousands + * of seconds. There is no particular reason for this other than + * that it turned out to work very well in practice. */ - cpu_time = (p->utime + p->stime) >> (SHIFT_HZ + 3); - run_time = (get_jiffies_64() - p->start_time) >> (SHIFT_HZ + 10); + cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime)) + >> (SHIFT_HZ + 3); + + if (uptime >= p->start_time.tv_sec) + run_time = (uptime - p->start_time.tv_sec) >> 10; + else + run_time = 0; s = int_sqrt(cpu_time); if (s) @@ -94,6 +112,17 @@ static int badness(struct task_struct *p) */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) points /= 4; + + /* + * Adjust the score by oomkilladj. + */ + if (p->oomkilladj) { + if (p->oomkilladj > 0) + points <<= p->oomkilladj; + else + points >>= -(p->oomkilladj); + } + #ifdef DEBUG printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", p->pid, p->comm, points); @@ -109,19 +138,32 @@ static int badness(struct task_struct *p) */ static struct task_struct * select_bad_process(void) { - int maxpoints = 0; + unsigned long maxpoints = 0; struct task_struct *g, *p; struct task_struct *chosen = NULL; + struct timespec uptime; + do_posix_clock_monotonic_gettime(&uptime); do_each_thread(g, p) - if (p->pid) { - int points = badness(p); - if (points > maxpoints) { + /* skip the init task with pid == 1 */ + if (p->pid > 1) { + unsigned long points; + + /* + * This is in the process of releasing memory so wait it + * to finish before killing some other task by mistake. + */ + if ((unlikely(test_tsk_thread_flag(p, TIF_MEMDIE)) || (p->flags & PF_EXITING)) && + !(p->flags & PF_DEAD)) + return ERR_PTR(-1UL); + if (p->flags & PF_SWAPOFF) + return p; + + points = badness(p, uptime.tv_sec); + if (points > maxpoints || !chosen) { chosen = p; maxpoints = points; } - if (p->flags & PF_SWAPOFF) - return p; } while_each_thread(g, p); return chosen; @@ -134,6 +176,12 @@ static struct task_struct * select_bad_process(void) */ static void __oom_kill_task(task_t *p) { + if (p->pid == 1) { + WARN_ON(1); + printk(KERN_WARNING "tried to kill init!\n"); + return; + } + task_lock(p); if (!p->mm || p->mm == &init_mm) { WARN_ON(1); @@ -150,25 +198,53 @@ static void __oom_kill_task(task_t *p) * exit() and clear out its resources quickly... */ p->time_slice = HZ; - p->flags |= PF_MEMALLOC | PF_MEMDIE; + set_tsk_thread_flag(p, TIF_MEMDIE); - /* This process has hardware access, be more careful. */ - if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { - force_sig(SIGTERM, p); - } else { - force_sig(SIGKILL, p); - } + force_sig(SIGKILL, p); } static struct mm_struct *oom_kill_task(task_t *p) { struct mm_struct *mm = get_task_mm(p); - if (!mm || mm == &init_mm) + task_t * g, * q; + + if (!mm) + return NULL; + if (mm == &init_mm) { + mmput(mm); return NULL; + } + __oom_kill_task(p); + /* + * kill all processes that share the ->mm (i.e. all threads), + * but are in a different thread group + */ + do_each_thread(g, q) + if (q->mm == mm && q->tgid != p->tgid) + __oom_kill_task(q); + while_each_thread(g, q); + return mm; } +static struct mm_struct *oom_kill_process(struct task_struct *p) +{ + struct mm_struct *mm; + struct task_struct *c; + struct list_head *tsk; + + /* Try to kill a child first */ + list_for_each(tsk, &p->children) { + c = list_entry(tsk, struct task_struct, sibling); + if (c->mm == p->mm) + continue; + mm = oom_kill_task(c); + if (mm) + return mm; + } + return oom_kill_task(p); +} /** * oom_kill - kill the "best" process when we run out of memory @@ -178,121 +254,40 @@ static struct mm_struct *oom_kill_task(task_t *p) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -static void oom_kill(void) +void out_of_memory(int gfp_mask) { - struct mm_struct *mm; - struct task_struct *g, *p, *q; - + struct mm_struct *mm = NULL; + task_t * p; + read_lock(&tasklist_lock); retry: p = select_bad_process(); + if (PTR_ERR(p) == -1UL) + goto out; + /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { + read_unlock(&tasklist_lock); show_free_areas(); panic("Out of memory and no killable processes...\n"); } - mm = oom_kill_task(p); + printk("oom-killer: gfp_mask=0x%x\n", gfp_mask); + show_free_areas(); + mm = oom_kill_process(p); if (!mm) goto retry; - /* - * kill all processes that share the ->mm (i.e. all threads), - * but are in a different thread group - */ - do_each_thread(g, q) - if (q->mm == mm && q->tgid != p->tgid) - __oom_kill_task(q); - while_each_thread(g, q); - if (!p->mm) - printk(KERN_INFO "Fixed up OOM kill of mm-less task\n"); - read_unlock(&tasklist_lock); - mmput(mm); - /* - * Make kswapd go out of the way, so "p" has a good chance of - * killing itself before someone else gets the chance to ask - * for more memory. - */ - yield(); - return; -} - -/** - * out_of_memory - is the system out of memory? - */ -void out_of_memory(void) -{ - /* - * oom_lock protects out_of_memory()'s static variables. - * It's a global lock; this is not performance-critical. - */ - static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED; - static unsigned long first, last, count, lastkill; - unsigned long now, since; - - /* - * Enough swap space left? Not OOM. - */ - if (nr_swap_pages > 0) - return; - - spin_lock(&oom_lock); - now = jiffies; - since = now - last; - last = now; - - /* - * If it's been a long time since last failure, - * we're not oom. - */ - last = now; - if (since > 5*HZ) - goto reset; - - /* - * If we haven't tried for at least one second, - * we're not really oom. - */ - since = now - first; - if (since < HZ) - goto out_unlock; - - /* - * If we have gotten only a few failures, - * we're not really oom. - */ - if (++count < 10) - goto out_unlock; - - /* - * If we just killed a process, wait a while - * to give that task a chance to exit. This - * avoids killing multiple processes needlessly. - */ - since = now - lastkill; - if (since < HZ*5) - goto out_unlock; - - /* - * Ok, really out of memory. Kill something. - */ - lastkill = now; - - /* oom_kill() sleeps */ - spin_unlock(&oom_lock); - oom_kill(); - spin_lock(&oom_lock); + out: + read_unlock(&tasklist_lock); + if (mm) + mmput(mm); -reset: /* - * We dropped the lock above, so check to be sure the variable - * first only ever increases to prevent false OOM's. + * Give "p" a good chance of killing itself before we + * retry to allocate memory. */ - if (time_after(now, first)) - first = now; - count = 0; - -out_unlock: - spin_unlock(&oom_lock); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); }