X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fckrm%2Fckrm_mem.c;h=c6c594a96803c618a92aea15abf7fd187e66166c;hb=44c40f29869a02dd430beb7fed0b6ca7d8ef5e54;hp=736b579c7460e73db5b0fd27eb1acb891e351b36;hpb=8d40237c730b8be87c1b80a5d96b9c603fefa829;p=linux-2.6.git diff --git a/kernel/ckrm/ckrm_mem.c b/kernel/ckrm/ckrm_mem.c index 736b579c7..c6c594a96 100644 --- a/kernel/ckrm/ckrm_mem.c +++ b/kernel/ckrm/ckrm_mem.c @@ -5,7 +5,7 @@ * Provides a Memory Resource controller for CKRM * * Latest version, more details at http://ckrm.sf.net - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -13,9 +13,14 @@ * */ +/* Code Description: TBD + * + */ + #include #include #include +#include #include #include #include @@ -24,35 +29,30 @@ #include #include #include -#include + #include #include #include -#include #define MEM_NAME "mem" #define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2 /* all 1-level memory_share_class are chained together */ -LIST_HEAD(ckrm_memclass_list); +static LIST_HEAD(ckrm_memclass_list); LIST_HEAD(ckrm_shrink_list); -spinlock_t ckrm_mem_lock; // protects both lists above +EXPORT_SYMBOL(ckrm_shrink_list); +spinlock_t ckrm_mem_lock = SPIN_LOCK_UNLOCKED; // protects both lists above +EXPORT_SYMBOL(ckrm_mem_lock); unsigned int ckrm_tot_lru_pages; // total # of pages in the system - // currently doesn't handle memory add/remove -struct ckrm_mem_res *ckrm_mem_root_class; -atomic_t ckrm_mem_real_count = ATOMIC_INIT(0); -static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *); -int ckrm_nr_mem_classes = 0; + // currently doesn't handle memory add/remove +EXPORT_SYMBOL(ckrm_tot_lru_pages); -EXPORT_SYMBOL_GPL(ckrm_memclass_list); -EXPORT_SYMBOL_GPL(ckrm_shrink_list); -EXPORT_SYMBOL_GPL(ckrm_mem_lock); -EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages); -EXPORT_SYMBOL_GPL(ckrm_mem_root_class); -EXPORT_SYMBOL_GPL(ckrm_mem_real_count); -EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes); +static ckrm_mem_res_t *ckrm_mem_root_class; +atomic_t ckrm_mem_real_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(ckrm_mem_real_count); +static void ckrm_mem_evaluate_all_pages(void); /* Initialize rescls values * May be called on each rcfs unmount or as part of error recovery @@ -60,15 +60,6 @@ EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes); * Does not traverse hierarchy reinitializing children. */ -void -memclass_release(struct kref *kref) -{ - struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users); - BUG_ON(ckrm_memclass_valid(cls)); - kfree(cls); -} -EXPORT_SYMBOL_GPL(memclass_release); - static void set_ckrm_tot_pages(void) { @@ -84,12 +75,11 @@ set_ckrm_tot_pages(void) } static void -mem_res_initcls_one(struct ckrm_mem_res *res) +mem_res_initcls_one(void *my_res) { - int zindex = 0; - struct zone *zone; + ckrm_mem_res_t *res = my_res; - memset(res, 0, sizeof(struct ckrm_mem_res)); + memset(res, 0, sizeof(ckrm_mem_res_t)); res->shares.my_guarantee = CKRM_SHARE_DONTCARE; res->shares.my_limit = CKRM_SHARE_DONTCARE; @@ -100,115 +90,21 @@ mem_res_initcls_one(struct ckrm_mem_res *res) res->pg_guar = CKRM_SHARE_DONTCARE; res->pg_limit = CKRM_SHARE_DONTCARE; - - INIT_LIST_HEAD(&res->shrink_list); - INIT_LIST_HEAD(&res->mcls_list); - - for_each_zone(zone) { - INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list); - INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list); - INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list); - res->ckrm_zone[zindex].nr_active = 0; - res->ckrm_zone[zindex].nr_inactive = 0; - res->ckrm_zone[zindex].zone = zone; - res->ckrm_zone[zindex].memcls = res; - zindex++; - } - res->pg_unused = 0; - res->nr_dontcare = 1; // for default class - kref_init(&res->nr_users); -} - -static void -set_impl_guar_children(struct ckrm_mem_res *parres) -{ - ckrm_core_class_t *child = NULL; - struct ckrm_mem_res *cres; - int nr_dontcare = 1; // for defaultclass - int guar, impl_guar; - int resid = mem_rcbs.resid; - - ckrm_lock_hier(parres->core); - while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - // treat NULL cres as don't care as that child is just being - // created. - // FIXME: need a better way to handle this case. - if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) { - nr_dontcare++; - } - } - - parres->nr_dontcare = nr_dontcare; - guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ? - parres->impl_guar : parres->pg_unused; - impl_guar = guar / parres->nr_dontcare; - - while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) { - cres->impl_guar = impl_guar; - set_impl_guar_children(cres); - } - } - ckrm_unlock_hier(parres->core); - -} - -void -check_memclass(struct ckrm_mem_res *res, char *str) -{ - int i, act = 0, inact = 0; - struct zone *zone; - struct ckrm_zone *ckrm_zone; - struct list_head *pos; - struct page *page; - -#if 0 - printk("Check<%s> %s: total=%d\n", - str, res->core->name, atomic_read(&res->pg_total)); -#endif - for (i = 0; i < MAX_NR_ZONES; i++) { - act = 0; inact = 0; - ckrm_zone = &res->ckrm_zone[i]; - zone = ckrm_zone->zone; - spin_lock_irq(&zone->lru_lock); - pos = ckrm_zone->inactive_list.next; - while (pos != &ckrm_zone->inactive_list) { - page = list_entry(pos, struct page, lru); - pos = pos->next; - inact++; - } - pos = ckrm_zone->active_list.next; - while (pos != &ckrm_zone->active_list) { - page = list_entry(pos, struct page, lru); - pos = pos->next; - act++; - } - spin_unlock_irq(&zone->lru_lock); -#if 0 - printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n", - str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive, - act, inact); -#endif - } } -EXPORT_SYMBOL_GPL(check_memclass); static void * mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) { - struct ckrm_mem_res *res, *pres; + ckrm_mem_res_t *res, *parres; if (mem_rcbs.resid == -1) { return NULL; } - pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res); - if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) { - printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n", - CKRM_MEM_MAX_HIERARCHY); + parres = ckrm_get_res_class(parent, mem_rcbs.resid, ckrm_mem_res_t); + if (parres && (parres->hier == CKRM_MEM_MAX_HIERARCHY)) { + // allows only upto CKRM_MEM_MAX_HIERARCHY return NULL; } @@ -216,23 +112,23 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) printk(KERN_ERR "MEM_RC: Only one root class is allowed\n"); return NULL; } - + if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) { - printk(KERN_ERR "MEM_RC: child class with no root class!!"); + printk(KERN_ERR "MEM_RC: creating child class without root class\n"); return NULL; } - - res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC); - + + res = kmalloc(sizeof(ckrm_mem_res_t), GFP_ATOMIC); + if (res) { mem_res_initcls_one(res); res->core = core; res->parent = parent; - spin_lock_irq(&ckrm_mem_lock); + spin_lock(&ckrm_mem_lock); list_add(&res->mcls_list, &ckrm_memclass_list); - spin_unlock_irq(&ckrm_mem_lock); + spin_unlock(&ckrm_mem_lock); if (parent == NULL) { - // I am part of the root class. So, set the max to + // I am part of the root class. So, set the max to // number of pages available res->pg_guar = ckrm_tot_lru_pages; res->pg_unused = ckrm_tot_lru_pages; @@ -240,17 +136,12 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) res->hier = 0; ckrm_mem_root_class = res; } else { - int guar; - res->hier = pres->hier + 1; - set_impl_guar_children(pres); - guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ? - pres->impl_guar : pres->pg_unused; - res->impl_guar = guar / pres->nr_dontcare; + res->hier = parres->hier + 1; } - ckrm_nr_mem_classes++; + mem_class_get(res); } else - printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n"); + printk(KERN_ERR "mem_res_alloc: failed GFP_ATOMIC alloc\n"); return res; } @@ -261,17 +152,17 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) * child is deleted this should be called after the child is removed. */ static void -child_maxlimit_changed_local(struct ckrm_mem_res *parres) +child_maxlimit_changed_local(ckrm_mem_res_t *parres) { int maxlimit = 0; - struct ckrm_mem_res *childres; + ckrm_mem_res_t *childres; ckrm_core_class_t *child = NULL; // run thru parent's children and get the new max_limit of the parent ckrm_lock_hier(parres->core); while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { childres = ckrm_get_res_class(child, mem_rcbs.resid, - struct ckrm_mem_res); + ckrm_mem_res_t); if (maxlimit < childres->shares.my_limit) { maxlimit = childres->shares.my_limit; } @@ -280,16 +171,47 @@ child_maxlimit_changed_local(struct ckrm_mem_res *parres) parres->shares.cur_max_limit = maxlimit; } +static void +mem_res_free(void *my_res) +{ + ckrm_mem_res_t *res = my_res; + ckrm_mem_res_t *parres; + + if (!res) + return; + + res->shares.my_guarantee = 0; + res->shares.my_limit = 0; + res->pg_guar = 0; + res->pg_limit = 0; + res->pg_unused = 0; + + parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, ckrm_mem_res_t); + // return child's limit/guarantee to parent node + if (parres) { + child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0); + child_maxlimit_changed_local(parres); + } + ckrm_mem_evaluate_all_pages(); + res->core = NULL; + + spin_lock(&ckrm_mem_lock); + list_del(&res->mcls_list); + spin_unlock(&ckrm_mem_lock); + mem_class_put(res); + return; +} + /* * Recalculate the guarantee and limit in # of pages... and propagate the * same to children. * Caller is responsible for protecting res and for the integrity of parres */ static void -recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres) +recalc_and_propagate(ckrm_mem_res_t * res, ckrm_mem_res_t * parres) { ckrm_core_class_t *child = NULL; - struct ckrm_mem_res *cres; + ckrm_mem_res_t *childres; int resid = mem_rcbs.resid; struct ckrm_shares *self = &res->shares; @@ -305,10 +227,8 @@ recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres) u64 temp = (u64) self->my_guarantee * parres->pg_guar; do_div(temp, par->total_guarantee); res->pg_guar = (int) temp; - res->impl_guar = CKRM_SHARE_DONTCARE; } else { res->pg_guar = 0; - res->impl_guar = CKRM_SHARE_DONTCARE; } if (parres->pg_limit == CKRM_SHARE_DONTCARE || @@ -337,112 +257,64 @@ recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres) // propagate to children ckrm_lock_hier(res->core); while ((child = ckrm_get_next_child(res->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - recalc_and_propagate(cres, res); + childres = ckrm_get_res_class(child, resid, ckrm_mem_res_t); + recalc_and_propagate(childres, res); } ckrm_unlock_hier(res->core); return; } -static void -mem_res_free(void *my_res) -{ - struct ckrm_mem_res *res = my_res; - struct ckrm_mem_res *pres; - - if (!res) - return; - - ckrm_mem_evaluate_all_pages(res); - - pres = ckrm_get_res_class(res->parent, mem_rcbs.resid, - struct ckrm_mem_res); - - if (pres) { - child_guarantee_changed(&pres->shares, - res->shares.my_guarantee, 0); - child_maxlimit_changed_local(pres); - recalc_and_propagate(pres, NULL); - set_impl_guar_children(pres); - } - - res->shares.my_guarantee = 0; - res->shares.my_limit = 0; - res->pg_guar = 0; - res->pg_limit = 0; - res->pg_unused = 0; - - spin_lock_irq(&ckrm_mem_lock); - list_del_init(&res->mcls_list); - spin_unlock_irq(&ckrm_mem_lock); - - res->core = NULL; - res->parent = NULL; - kref_put(&res->nr_users, memclass_release); - ckrm_nr_mem_classes--; - return; -} - static int mem_set_share_values(void *my_res, struct ckrm_shares *shares) { - struct ckrm_mem_res *res = my_res; - struct ckrm_mem_res *parres; - int rc; + ckrm_mem_res_t *res = my_res; + ckrm_mem_res_t *parres; + int rc = EINVAL; - if (!res) + if (!res) return -EINVAL; - parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, - struct ckrm_mem_res); + parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, ckrm_mem_res_t); rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL); if ((rc == 0) && (parres != NULL)) { child_maxlimit_changed_local(parres); recalc_and_propagate(parres, NULL); - set_impl_guar_children(parres); } - return rc; } static int mem_get_share_values(void *my_res, struct ckrm_shares *shares) { - struct ckrm_mem_res *res = my_res; + ckrm_mem_res_t *res = my_res; - if (!res) + if (!res) return -EINVAL; *shares = res->shares; return 0; } -static int +static int mem_get_stats(void *my_res, struct seq_file *sfile) { - struct ckrm_mem_res *res = my_res; - struct zone *zone; - int active = 0, inactive = 0, fr = 0; + ckrm_mem_res_t *res = my_res; - if (!res) + if (!res) return -EINVAL; - seq_printf(sfile, "--------- Memory Resource stats start ---------\n"); - if (res == ckrm_mem_root_class) { - int i = 0; - for_each_zone(zone) { - active += zone->nr_active; - inactive += zone->nr_inactive; - fr += zone->free_pages; - i++; - } - seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d" - ",free=%d\n", ckrm_tot_lru_pages, - active, inactive, fr); - } - seq_printf(sfile, "Number of pages used(including pages lent to" - " children): %d\n", atomic_read(&res->pg_total)); +#if 0 + seq_printf(sfile, "tot %6d;gua %6d;lmt %6d;unu %6d;" + "lnt %6d;bor %6d;rlt %6d\n", atomic_read(&res->pg_total), + res->pg_guar, res->pg_limit, res->pg_unused, res->pg_lent, + res->pg_borrowed, atomic_read(&ckrm_mem_real_count)); +#endif + + + seq_printf(sfile, "----------- Memory Resource stats start -----------\n"); + seq_printf(sfile, "Number of pages used(including pages lent to children):" + " %d\n", atomic_read(&res->pg_total)); seq_printf(sfile, "Number of pages guaranteed: %d\n", res->pg_guar); seq_printf(sfile, "Maximum limit of pages: %d\n", @@ -454,7 +326,7 @@ mem_get_stats(void *my_res, struct seq_file *sfile) res->pg_lent); seq_printf(sfile, "Number of pages borrowed from the parent: %d\n", res->pg_borrowed); - seq_printf(sfile, "---------- Memory Resource stats end ----------\n"); + seq_printf(sfile, "----------- Memory Resource stats end -----------\n"); return 0; } @@ -465,14 +337,14 @@ mem_change_resclass(void *tsk, void *old, void *new) struct mm_struct *mm; struct task_struct *task = tsk, *t1; struct ckrm_mem_res *prev_mmcls; - + if (!task->mm || (new == old) || (old == (void *) -1)) return; mm = task->active_mm; spin_lock(&mm->peertask_lock); prev_mmcls = mm->memclass; - + if (new == NULL) { list_del_init(&task->mm_peers); } else { @@ -490,130 +362,55 @@ mem_change_resclass(void *tsk, void *old, void *new) } spin_unlock(&mm->peertask_lock); - ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new); + ckrm_mem_evaluate_mm(mm); + /* + printk("chg_cls: task <%s:%d> mm %p oldmm %s newmm %s o %s n %s\n", + task->comm, task->pid, mm, prev_mmcls ? prev_mmcls->core->name: + "NULL", mm->memclass ? mm->memclass->core->name : "NULL", + o ? o->core->name: "NULL", n ? n->core->name: "NULL"); + */ return; } -#define MEM_FAIL_OVER "fail_over" -#define MEM_SHRINK_AT "shrink_at" -#define MEM_SHRINK_TO "shrink_to" -#define MEM_SHRINK_COUNT "num_shrinks" -#define MEM_SHRINK_INTERVAL "shrink_interval" - -int ckrm_mem_fail_over = 110; -int ckrm_mem_shrink_at = 90; -static int ckrm_mem_shrink_to = 80; -static int ckrm_mem_shrink_count = 10; -static int ckrm_mem_shrink_interval = 10; - -EXPORT_SYMBOL_GPL(ckrm_mem_fail_over); -EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at); - +// config file is available only at the root level, +// so assuming my_res to be the system level class static int -mem_show_config(void *my_res, struct seq_file *sfile) +mem_set_config(void *my_res, const char *cfgstr) { - struct ckrm_mem_res *res = my_res; - - if (!res) - return -EINVAL; - - seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n", - MEM_NAME, - MEM_FAIL_OVER, ckrm_mem_fail_over, - MEM_SHRINK_AT, ckrm_mem_shrink_at, - MEM_SHRINK_TO, ckrm_mem_shrink_to, - MEM_SHRINK_COUNT, ckrm_mem_shrink_count, - MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval); + ckrm_mem_res_t *res = my_res; + printk(KERN_INFO "%s class of %s is called with config<%s>\n", + MEM_NAME, res->core->name, cfgstr); return 0; } -// config file is available only at the root level, -// so assuming my_res to be the system level class -enum memclass_token { - mem_fail_over, - mem_shrink_at, - mem_shrink_to, - mem_shrink_count, - mem_shrink_interval, - mem_err -}; - -static match_table_t mem_tokens = { - {mem_fail_over, MEM_FAIL_OVER "=%d"}, - {mem_shrink_at, MEM_SHRINK_AT "=%d"}, - {mem_shrink_to, MEM_SHRINK_TO "=%d"}, - {mem_shrink_count, MEM_SHRINK_COUNT "=%d"}, - {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"}, - {mem_err, NULL}, -}; - -static int -mem_set_config(void *my_res, const char *cfgstr) +static int +mem_show_config(void *my_res, struct seq_file *sfile) { - char *p; - struct ckrm_mem_res *res = my_res; - int err = 0, val; + struct zone *zone; + ckrm_mem_res_t *res = my_res; + int active = 0, inactive = 0, fr = 0; if (!res) return -EINVAL; - while ((p = strsep((char**)&cfgstr, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - int token; - if (!*p) - continue; - - token = match_token(p, mem_tokens, args); - switch (token) { - case mem_fail_over: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_fail_over = val; - } - break; - case mem_shrink_at: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_at = val; - } - break; - case mem_shrink_to: - if (match_int(args, &val) || (val < 0) || (val > 100)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_to = val; - } - break; - case mem_shrink_count: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_count = val; - } - break; - case mem_shrink_interval: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_interval = val; - } - break; - default: - err = -EINVAL; - } + for_each_zone(zone) { + active += zone->nr_active; + inactive += zone->nr_inactive; + fr += zone->free_pages; } - return err; + seq_printf(sfile, "res=%s;tot_pages=%d,active=%d,inactive=%d,free=%d\n", + MEM_NAME, ckrm_tot_lru_pages,active,inactive,fr); + + + return 0; } static int mem_reset_stats(void *my_res) { - struct ckrm_mem_res *res = my_res; - printk(KERN_INFO "MEM_RC: reset stats called for class %s\n", - res->core->name); + ckrm_mem_res_t *res = my_res; + printk(KERN_INFO " memclass of %s called for reset\n", res->core->name); return 0; } @@ -632,7 +429,7 @@ struct ckrm_res_ctlr mem_rcbs = { .reset_stats = mem_reset_stats, }; -EXPORT_SYMBOL_GPL(mem_rcbs); +EXPORT_SYMBOL(mem_rcbs); int __init init_ckrm_mem_res(void) @@ -641,7 +438,6 @@ init_ckrm_mem_res(void) int resid = mem_rcbs.resid; set_ckrm_tot_pages(); - spin_lock_init(&ckrm_mem_lock); clstype = ckrm_find_classtype_by_name("taskclass"); if (clstype == NULL) { printk(KERN_INFO " Unknown ckrm classtype"); @@ -655,7 +451,7 @@ init_ckrm_mem_res(void) } } return ((resid < 0) ? resid : 0); -} +} void __exit exit_ckrm_mem_res(void) @@ -667,229 +463,362 @@ exit_ckrm_mem_res(void) module_init(init_ckrm_mem_res) module_exit(exit_ckrm_mem_res) -int -ckrm_mem_get_shrink_to(void) +static void +set_flags_of_children(ckrm_mem_res_t *parres, unsigned int flag) +{ + ckrm_mem_res_t *childres; + ckrm_core_class_t *child = NULL; + + parres->reclaim_flags |= flag; + ckrm_lock_hier(parres->core); + while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { + childres = ckrm_get_res_class(child, mem_rcbs.resid, + ckrm_mem_res_t); + set_flags_of_children(childres, flag); + } + ckrm_unlock_hier(parres->core); + return; +} + +// FIXME: more attention is needed to this function +static unsigned int +set_usage_flags(ckrm_mem_res_t *res) +{ + int tot_usage, cls_usage, range, guar; + + if (res->pg_limit == CKRM_SHARE_DONTCARE) { + // No limit is set for the class. don't bother it + res->reclaim_flags = 0; + return res->reclaim_flags; + } + + tot_usage = atomic_read(&res->pg_total); + cls_usage = tot_usage - res->pg_lent; + guar = (res->pg_guar > 0) ? res->pg_guar : 0; + range = res->pg_limit - guar; + + if ((tot_usage > (guar + ((110 * range) / 100))) && + (res->pg_lent > (guar + ((25 * range) / 100)))) { + set_flags_of_children(res, CLS_PARENT_OVER); + } + + if (cls_usage > (guar + ((110 * range) / 100))) { + res->reclaim_flags |= CLS_OVER_110; + } else if (cls_usage > (guar + range)) { + res->reclaim_flags |= CLS_OVER_100; + } else if (cls_usage > (guar + ((3 * range) / 4))) { + res->reclaim_flags |= CLS_OVER_75; + } else if (cls_usage > (guar + (range / 2))) { + res->reclaim_flags |= CLS_OVER_50; + } else if (cls_usage > (guar + (range / 4))) { + res->reclaim_flags |= CLS_OVER_25; + } else if (cls_usage > guar) { + res->reclaim_flags |= CLS_OVER_GUAR; + } else { + res->reclaim_flags = 0; + } + return res->reclaim_flags; +} + +/* + * The functions ckrm_setup_reclamation(), ckrm_teardown_reclamation(), + * ckrm_get_reclaim_bits() and the macro ckrm_kick_page() along with the + * macros CLS_* define how the pages are reclaimed. + * Keeping this logic thru these interface eliminate the necessity to + * change the reclaimation code in VM if we want to change the logic. + */ +unsigned int +ckrm_setup_reclamation(void) +{ + ckrm_mem_res_t *res; + unsigned int ret = 0; + + spin_lock(&ckrm_mem_lock); + set_ckrm_tot_pages(); + ckrm_mem_root_class->pg_guar = ckrm_tot_lru_pages; + ckrm_mem_root_class->pg_unused = ckrm_tot_lru_pages; + ckrm_mem_root_class->pg_limit = ckrm_tot_lru_pages; + recalc_and_propagate(ckrm_mem_root_class, NULL); + list_for_each_entry(res, &ckrm_memclass_list, mcls_list) { + ret |= set_usage_flags(res); + } + spin_unlock(&ckrm_mem_lock); + return ret; +} + +void +ckrm_teardown_reclamation(void) +{ + ckrm_mem_res_t *res; + spin_lock(&ckrm_mem_lock); + list_for_each_entry(res, &ckrm_memclass_list, mcls_list) { + res->reclaim_flags = 0; + } + spin_unlock(&ckrm_mem_lock); +} + +void +ckrm_get_reclaim_bits(unsigned int *flags, unsigned int *extract) { - return ckrm_mem_shrink_to; + int i, j, mask = 0; + + if (*flags == 0) { + *extract = 0; + return; + } + + if (*flags & CLS_SHRINK) { + *extract = CLS_SHRINK; + *flags = 0; + return; + } + + i = fls(*flags); + for (j = i-1; j > 0; j--) { + mask = (mask<<1) | 1; + } + *extract = (CLS_FLAGS_ALL & ~mask); + *flags &= ~*extract; + return; } void -ckrm_at_limit(struct ckrm_mem_res *cls) +ckrm_at_limit(ckrm_mem_res_t *cls) { +#ifndef AT_LIMIT_SUPPORT +#warning "ckrm_at_limit disabled due to problems with memory hog tests" +#else struct zone *zone; unsigned long now = jiffies; - if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) || + if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) || ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) { return; } - if ((cls->last_shrink > now) /* jiffies wrapped around */ || - (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) { + if ((cls->last_shrink + (10 * HZ)) < now) { // 10 seconds since last ? cls->last_shrink = now; cls->shrink_count = 0; } cls->shrink_count++; - if (cls->shrink_count > ckrm_mem_shrink_count) { + if (cls->shrink_count > 10) { return; } - spin_lock_irq(&ckrm_mem_lock); + spin_lock(&ckrm_mem_lock); list_add(&cls->shrink_list, &ckrm_shrink_list); - spin_unlock_irq(&ckrm_mem_lock); + spin_unlock(&ckrm_mem_lock); cls->flags |= MEM_AT_LIMIT; for_each_zone(zone) { wakeup_kswapd(zone); break; // only once is enough } +#endif // AT_LIMIT_SUPPORT } -static int +static int unmapped = 0, changed = 0, unchanged = 0, maxnull = 0, +anovma = 0, fnovma = 0; +static void ckrm_mem_evaluate_page_anon(struct page* page) { - struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls; - struct ckrm_mem_res* maxshareclass = NULL; + ckrm_mem_res_t* pgcls = page_class(page); + ckrm_mem_res_t* maxshareclass = NULL; struct anon_vma *anon_vma = (struct anon_vma *) page->mapping; struct vm_area_struct *vma; struct mm_struct* mm; - int ret = 0; + int v = 0; spin_lock(&anon_vma->lock); BUG_ON(list_empty(&anon_vma->head)); list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { + v++; mm = vma->vm_mm; - if (!maxshareclass || ckrm_mem_share_compare(maxshareclass, - mm->memclass) < 0) { + if (!maxshareclass || + ckrm_mem_share_compare(maxshareclass, mm->memclass) < 0) { maxshareclass = mm->memclass; } } spin_unlock(&anon_vma->lock); + if (!v) + anovma++; - if (!maxshareclass) { - maxshareclass = ckrm_mem_root_class; - } - if (pgcls != maxshareclass) { + if (!maxshareclass) + maxnull++; + if (maxshareclass && (pgcls != maxshareclass)) { ckrm_change_page_class(page, maxshareclass); - ret = 1; - } - return ret; + changed++; + } else + unchanged++; + return; } -static int -ckrm_mem_evaluate_page_file(struct page* page) +static void +ckrm_mem_evaluate_page_file(struct page* page) { - struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls; - struct ckrm_mem_res* maxshareclass = NULL; + ckrm_mem_res_t* pgcls = page_class(page); + ckrm_mem_res_t* maxshareclass = NULL; struct address_space *mapping = page->mapping; struct vm_area_struct *vma = NULL; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct prio_tree_iter iter; struct mm_struct* mm; - int ret = 0; + int v = 0; if (!mapping) - return 0; + return; if (!spin_trylock(&mapping->i_mmap_lock)) - return 0; + return; - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, - pgoff, pgoff) { + while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap, + &iter, pgoff, pgoff)) != NULL) { + v++; mm = vma->vm_mm; - if (!maxshareclass || ckrm_mem_share_compare(maxshareclass, - mm->memclass)<0) + if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,mm->memclass)<0) maxshareclass = mm->memclass; } spin_unlock(&mapping->i_mmap_lock); - if (!maxshareclass) { - maxshareclass = ckrm_mem_root_class; - } - if (pgcls != maxshareclass) { + if (!v) + fnovma++; + if (!maxshareclass) + maxnull++; + + if (maxshareclass && pgcls != maxshareclass) { ckrm_change_page_class(page, maxshareclass); - ret = 1; - } - return ret; + changed++; + } else + unchanged++; + return; } -static int -ckrm_mem_evaluate_page(struct page* page) +static void +ckrm_mem_evaluate_page(struct page* page) { - int ret = 0; - BUG_ON(page->ckrm_zone == NULL); if (page->mapping) { if (PageAnon(page)) - ret = ckrm_mem_evaluate_page_anon(page); + ckrm_mem_evaluate_page_anon(page); else - ret = ckrm_mem_evaluate_page_file(page); - } - return ret; + ckrm_mem_evaluate_page_file(page); + } else + unmapped++; + return; } static void -ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res) +ckrm_mem_evaluate_all_pages() { struct page *page; - struct ckrm_zone *ckrm_zone; struct zone *zone; - struct list_head *pos, *next; - int i; + int active = 0, inactive = 0, cleared = 0; + int act_cnt, inact_cnt, idx; + ckrm_mem_res_t *res; + + spin_lock(&ckrm_mem_lock); + list_for_each_entry(res, &ckrm_memclass_list, mcls_list) { + res->tmp_cnt = 0; + } + spin_unlock(&ckrm_mem_lock); - check_memclass(res, "bef_eval_all_pgs"); - for (i = 0; i < MAX_NR_ZONES; i++) { - ckrm_zone = &res->ckrm_zone[i]; - zone = ckrm_zone->zone; + for_each_zone(zone) { spin_lock_irq(&zone->lru_lock); - pos = ckrm_zone->inactive_list.next; - while (pos != &ckrm_zone->inactive_list) { - next = pos->next; - page = list_entry(pos, struct page, lru); - if (!ckrm_mem_evaluate_page(page)) - ckrm_change_page_class(page, - ckrm_mem_root_class); - pos = next; + list_for_each_entry(page, &zone->inactive_list, lru) { + ckrm_mem_evaluate_page(page); + active++; + page_class(page)->tmp_cnt++; + if (!test_bit(PG_ckrm_account, &page->flags)) + cleared++; } - pos = ckrm_zone->active_list.next; - while (pos != &ckrm_zone->active_list) { - next = pos->next; - page = list_entry(pos, struct page, lru); - if (!ckrm_mem_evaluate_page(page)) - ckrm_change_page_class(page, - ckrm_mem_root_class); - pos = next; + list_for_each_entry(page, &zone->active_list, lru) { + ckrm_mem_evaluate_page(page); + inactive++; + page_class(page)->tmp_cnt++; + if (!test_bit(PG_ckrm_account, &page->flags)) + cleared++; } spin_unlock_irq(&zone->lru_lock); } - check_memclass(res, "aft_eval_all_pgs"); + printk(KERN_DEBUG "all_pages: active %d inactive %d cleared %d\n", + active, inactive, cleared); + spin_lock(&ckrm_mem_lock); + list_for_each_entry(res, &ckrm_memclass_list, mcls_list) { + act_cnt = 0; inact_cnt = 0; idx = 0; + for_each_zone(zone) { + act_cnt += res->nr_active[idx]; + inact_cnt += res->nr_inactive[idx]; + idx++; + } + printk(KERN_DEBUG "all_pages: %s: tmp_cnt %d; act_cnt %d inact_cnt %d\n", + res->core->name, res->tmp_cnt, act_cnt, inact_cnt); + } + spin_unlock(&ckrm_mem_lock); + + // check all mm's in the system to see which memclass they are attached + // to. return; } -static inline int +static /*inline*/ int class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma, pmd_t* pmdir, unsigned long address, unsigned long end) { - pte_t *pte; + pte_t *pte, *orig_pte; unsigned long pmd_end; - + if (pmd_none(*pmdir)) return 0; BUG_ON(pmd_bad(*pmdir)); - + + orig_pte = pte = pte_offset_map(pmdir,address); pmd_end = (address+PMD_SIZE)&PMD_MASK; if (end>pmd_end) end = pmd_end; - + do { - pte = pte_offset_map(pmdir,address); if (pte_present(*pte)) { - struct page *page = pte_page(*pte); BUG_ON(mm->memclass == NULL); - if (page->mapping && page->ckrm_zone) { - struct zone *zone = page->ckrm_zone->zone; - spin_lock_irq(&zone->lru_lock); - ckrm_change_page_class(page, mm->memclass); - spin_unlock_irq(&zone->lru_lock); - } + ckrm_change_page_class(pte_page(*pte), mm->memclass); + // ckrm_mem_evaluate_page(pte_page(*pte)); } address += PAGE_SIZE; - pte_unmap(pte); pte++; } while(address && (addresspgd_end)) end = pgd_end; - + do { class_migrate_pmd(mm,vma,pmd,address,end); - address = (address+PMD_SIZE)&PMD_MASK; + address = (address+PMD_SIZE)&PMD_MASK; pmd++; } while (address && (addressvm_start; end = vma->vm_end; - + pgdir = pgd_offset(vma->vm_mm, address); do { class_migrate_pgd(mm,vma,pgdir,address,end); @@ -901,36 +830,34 @@ class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma) /* this function is called with mm->peertask_lock hold */ void -ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def) +ckrm_mem_evaluate_mm(struct mm_struct* mm) { struct task_struct *task; - struct ckrm_mem_res *maxshareclass = def; + struct ckrm_mem_res *maxshareclass = NULL; struct vm_area_struct *vma; - + if (list_empty(&mm->tasklist)) { /* We leave the mm->memclass untouched since we believe that one * mm with no task associated will be deleted soon or attach * with another task later. */ - return; + return; } list_for_each_entry(task, &mm->tasklist, mm_peers) { - struct ckrm_mem_res* cls = ckrm_get_mem_class(task); + ckrm_mem_res_t* cls = GET_MEM_CLASS(task); if (!cls) continue; - if (!maxshareclass || - ckrm_mem_share_compare(maxshareclass,cls)<0 ) + if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,cls)<0 ) maxshareclass = cls; } - if (maxshareclass && (mm->memclass != maxshareclass)) { - if (mm->memclass) { - kref_put(&mm->memclass->nr_users, memclass_release); - } + if (maxshareclass && (mm->memclass != (void *)maxshareclass)) { + if (mm->memclass) + mem_class_put(mm->memclass); mm->memclass = maxshareclass; - kref_get(&maxshareclass->nr_users); - + mem_class_get(maxshareclass); + /* Go through all VMA to migrate pages */ down_read(&mm->mmap_sem); vma = mm->mmap; @@ -948,33 +875,29 @@ ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task) { spin_lock(&mm->peertask_lock); if (!list_empty(&task->mm_peers)) { - printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n"); + printk(KERN_ERR "CKRM_MEM: Task list should be empty, but is not!!\n"); list_del_init(&task->mm_peers); } list_add_tail(&task->mm_peers, &mm->tasklist); spin_unlock(&mm->peertask_lock); - if (mm->memclass != ckrm_get_mem_class(task)) - ckrm_mem_evaluate_mm(mm, NULL); + if (mm->memclass != GET_MEM_CLASS(task)) + ckrm_mem_evaluate_mm(mm); return; } int -ckrm_memclass_valid(struct ckrm_mem_res *cls) +ckrm_memclass_valid(ckrm_mem_res_t *cls) { - struct ckrm_mem_res *tmp; - unsigned long flags; + ckrm_mem_res_t *tmp; - if (!cls || list_empty(&cls->mcls_list)) { - return 0; - } - spin_lock_irqsave(&ckrm_mem_lock, flags); + spin_lock(&ckrm_mem_lock); list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) { if (tmp == cls) { spin_unlock(&ckrm_mem_lock); return 1; } } - spin_unlock_irqrestore(&ckrm_mem_lock, flags); + spin_unlock(&ckrm_mem_lock); return 0; }