1 /* ckrm_mem.c - Memory Resource Manager for CKRM
3 * Copyright (C) Chandra Seetharaman, IBM Corp. 2004
5 * Provides a Memory Resource controller for CKRM
7 * Latest version, more details at http://ckrm.sf.net
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
16 #include <linux/module.h>
17 #include <linux/init.h>
18 #include <linux/slab.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/pagemap.h>
22 #include <linux/swap.h>
23 #include <linux/swapops.h>
24 #include <linux/cache.h>
25 #include <linux/percpu.h>
26 #include <linux/pagevec.h>
27 #include <linux/parser.h>
28 #include <linux/ckrm_mem_inline.h>
30 #include <asm/uaccess.h>
31 #include <asm/pgtable.h>
32 #include <asm/errno.h>
34 #define MEM_NAME "mem"
36 #define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2
38 /* all 1-level memory_share_class are chained together */
39 LIST_HEAD(ckrm_memclass_list);
40 LIST_HEAD(ckrm_shrink_list);
41 spinlock_t ckrm_mem_lock; // protects both lists above
42 unsigned int ckrm_tot_lru_pages; // total # of pages in the system
43 // currently doesn't handle memory add/remove
44 struct ckrm_mem_res *ckrm_mem_root_class;
45 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
46 static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *);
47 int ckrm_nr_mem_classes = 0;
49 EXPORT_SYMBOL_GPL(ckrm_memclass_list);
50 EXPORT_SYMBOL_GPL(ckrm_shrink_list);
51 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
52 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
53 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
54 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
55 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
57 /* Initialize rescls values
58 * May be called on each rcfs unmount or as part of error recovery
59 * to make share values sane.
60 * Does not traverse hierarchy reinitializing children.
64 memclass_release(struct kref *kref)
66 struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users);
67 BUG_ON(ckrm_memclass_valid(cls));
70 EXPORT_SYMBOL_GPL(memclass_release);
73 set_ckrm_tot_pages(void)
76 int tot_lru_pages = 0;
79 tot_lru_pages += zone->nr_active;
80 tot_lru_pages += zone->nr_inactive;
81 tot_lru_pages += zone->free_pages;
83 ckrm_tot_lru_pages = tot_lru_pages;
87 mem_res_initcls_one(struct ckrm_mem_res *res)
92 memset(res, 0, sizeof(struct ckrm_mem_res));
94 res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
95 res->shares.my_limit = CKRM_SHARE_DONTCARE;
96 res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
97 res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
98 res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
99 res->shares.cur_max_limit = 0;
101 res->pg_guar = CKRM_SHARE_DONTCARE;
102 res->pg_limit = CKRM_SHARE_DONTCARE;
104 INIT_LIST_HEAD(&res->shrink_list);
105 INIT_LIST_HEAD(&res->mcls_list);
107 for_each_zone(zone) {
108 INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
109 INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
110 INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
111 res->ckrm_zone[zindex].nr_active = 0;
112 res->ckrm_zone[zindex].nr_inactive = 0;
113 res->ckrm_zone[zindex].zone = zone;
114 res->ckrm_zone[zindex].memcls = res;
119 res->nr_dontcare = 1; // for default class
120 kref_init(&res->nr_users);
124 set_impl_guar_children(struct ckrm_mem_res *parres)
126 ckrm_core_class_t *child = NULL;
127 struct ckrm_mem_res *cres;
128 int nr_dontcare = 1; // for defaultclass
130 int resid = mem_rcbs.resid;
132 ckrm_lock_hier(parres->core);
133 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
134 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
135 // treat NULL cres as don't care as that child is just being
137 // FIXME: need a better way to handle this case.
138 if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
143 parres->nr_dontcare = nr_dontcare;
144 guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
145 parres->impl_guar : parres->pg_unused;
146 impl_guar = guar / parres->nr_dontcare;
148 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
149 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
150 if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
151 cres->impl_guar = impl_guar;
152 set_impl_guar_children(cres);
155 ckrm_unlock_hier(parres->core);
160 check_memclass(struct ckrm_mem_res *res, char *str)
162 int i, act = 0, inact = 0;
164 struct ckrm_zone *ckrm_zone;
165 struct list_head *pos;
168 printk("Check<%s> %s: total=%d\n",
169 str, res->core->name, atomic_read(&res->pg_total));
170 for (i = 0; i < MAX_NR_ZONES; i++) {
172 ckrm_zone = &res->ckrm_zone[i];
173 zone = ckrm_zone->zone;
174 spin_lock_irq(&zone->lru_lock);
175 pos = ckrm_zone->inactive_list.next;
176 while (pos != &ckrm_zone->inactive_list) {
177 page = list_entry(pos, struct page, lru);
181 pos = ckrm_zone->active_list.next;
182 while (pos != &ckrm_zone->active_list) {
183 page = list_entry(pos, struct page, lru);
187 spin_unlock_irq(&zone->lru_lock);
188 printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n",
189 str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive,
193 EXPORT_SYMBOL_GPL(check_memclass);
196 mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
198 struct ckrm_mem_res *res, *pres;
200 if (mem_rcbs.resid == -1) {
204 pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
205 if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
206 printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
207 CKRM_MEM_MAX_HIERARCHY);
211 if (unlikely((parent == NULL) && (ckrm_mem_root_class != NULL))) {
212 printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
216 if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) {
217 printk(KERN_ERR "MEM_RC: child class with no root class!!");
221 res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
224 mem_res_initcls_one(res);
226 res->parent = parent;
227 spin_lock_irq(&ckrm_mem_lock);
228 list_add(&res->mcls_list, &ckrm_memclass_list);
229 spin_unlock_irq(&ckrm_mem_lock);
230 if (parent == NULL) {
231 // I am part of the root class. So, set the max to
232 // number of pages available
233 res->pg_guar = ckrm_tot_lru_pages;
234 res->pg_unused = ckrm_tot_lru_pages;
235 res->pg_limit = ckrm_tot_lru_pages;
237 ckrm_mem_root_class = res;
240 res->hier = pres->hier + 1;
241 set_impl_guar_children(pres);
242 guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
243 pres->impl_guar : pres->pg_unused;
244 res->impl_guar = guar / pres->nr_dontcare;
246 ckrm_nr_mem_classes++;
249 printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
254 * It is the caller's responsibility to make sure that the parent only
255 * has chilren that are to be accounted. i.e if a new child is added
256 * this function should be called after it has been added, and if a
257 * child is deleted this should be called after the child is removed.
260 child_maxlimit_changed_local(struct ckrm_mem_res *parres)
263 struct ckrm_mem_res *childres;
264 ckrm_core_class_t *child = NULL;
266 // run thru parent's children and get the new max_limit of the parent
267 ckrm_lock_hier(parres->core);
268 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
269 childres = ckrm_get_res_class(child, mem_rcbs.resid,
270 struct ckrm_mem_res);
271 if (maxlimit < childres->shares.my_limit) {
272 maxlimit = childres->shares.my_limit;
275 ckrm_unlock_hier(parres->core);
276 parres->shares.cur_max_limit = maxlimit;
280 * Recalculate the guarantee and limit in # of pages... and propagate the
282 * Caller is responsible for protecting res and for the integrity of parres
285 recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
287 ckrm_core_class_t *child = NULL;
288 struct ckrm_mem_res *cres;
289 int resid = mem_rcbs.resid;
290 struct ckrm_shares *self = &res->shares;
293 struct ckrm_shares *par = &parres->shares;
295 // calculate pg_guar and pg_limit
297 if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
298 self->my_guarantee == CKRM_SHARE_DONTCARE) {
299 res->pg_guar = CKRM_SHARE_DONTCARE;
300 } else if (par->total_guarantee) {
301 u64 temp = (u64) self->my_guarantee * parres->pg_guar;
302 do_div(temp, par->total_guarantee);
303 res->pg_guar = (int) temp;
304 res->impl_guar = CKRM_SHARE_DONTCARE;
307 res->impl_guar = CKRM_SHARE_DONTCARE;
310 if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
311 self->my_limit == CKRM_SHARE_DONTCARE) {
312 res->pg_limit = CKRM_SHARE_DONTCARE;
313 } else if (par->max_limit) {
314 u64 temp = (u64) self->my_limit * parres->pg_limit;
315 do_div(temp, par->max_limit);
316 res->pg_limit = (int) temp;
322 // Calculate unused units
323 if (res->pg_guar == CKRM_SHARE_DONTCARE) {
324 res->pg_unused = CKRM_SHARE_DONTCARE;
325 } else if (self->total_guarantee) {
326 u64 temp = (u64) self->unused_guarantee * res->pg_guar;
327 do_div(temp, self->total_guarantee);
328 res->pg_unused = (int) temp;
333 // propagate to children
334 ckrm_lock_hier(res->core);
335 while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
336 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
337 recalc_and_propagate(cres, res);
339 ckrm_unlock_hier(res->core);
344 mem_res_free(void *my_res)
346 struct ckrm_mem_res *res = my_res;
347 struct ckrm_mem_res *pres;
352 ckrm_mem_evaluate_all_pages(res);
354 pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
355 struct ckrm_mem_res);
358 child_guarantee_changed(&pres->shares,
359 res->shares.my_guarantee, 0);
360 child_maxlimit_changed_local(pres);
361 recalc_and_propagate(pres, NULL);
362 set_impl_guar_children(pres);
365 res->shares.my_guarantee = 0;
366 res->shares.my_limit = 0;
371 spin_lock_irq(&ckrm_mem_lock);
372 list_del_init(&res->mcls_list);
373 spin_unlock_irq(&ckrm_mem_lock);
377 kref_put(&res->nr_users, memclass_release);
378 ckrm_nr_mem_classes--;
383 mem_set_share_values(void *my_res, struct ckrm_shares *shares)
385 struct ckrm_mem_res *res = my_res;
386 struct ckrm_mem_res *parres;
392 parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
393 struct ckrm_mem_res);
395 rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
397 if ((rc == 0) && (parres != NULL)) {
398 child_maxlimit_changed_local(parres);
399 recalc_and_propagate(parres, NULL);
400 set_impl_guar_children(parres);
407 mem_get_share_values(void *my_res, struct ckrm_shares *shares)
409 struct ckrm_mem_res *res = my_res;
413 *shares = res->shares;
418 mem_get_stats(void *my_res, struct seq_file *sfile)
420 struct ckrm_mem_res *res = my_res;
422 int active = 0, inactive = 0, fr = 0;
427 seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
428 if (res == ckrm_mem_root_class) {
430 for_each_zone(zone) {
431 active += zone->nr_active;
432 inactive += zone->nr_inactive;
433 fr += zone->free_pages;
436 seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
437 ",free=%d\n", ckrm_tot_lru_pages,
438 active, inactive, fr);
440 seq_printf(sfile, "Number of pages used(including pages lent to"
441 " children): %d\n", atomic_read(&res->pg_total));
442 seq_printf(sfile, "Number of pages guaranteed: %d\n",
444 seq_printf(sfile, "Maximum limit of pages: %d\n",
446 seq_printf(sfile, "Total number of pages available"
447 "(after serving guarantees to children): %d\n",
449 seq_printf(sfile, "Number of pages lent to children: %d\n",
451 seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
453 seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
459 mem_change_resclass(void *tsk, void *old, void *new)
461 struct mm_struct *mm;
462 struct task_struct *task = tsk, *t1;
463 struct ckrm_mem_res *prev_mmcls;
465 if (!task->mm || (new == old) || (old == (void *) -1))
468 mm = task->active_mm;
469 spin_lock(&mm->peertask_lock);
470 prev_mmcls = mm->memclass;
473 list_del_init(&task->mm_peers);
476 list_for_each_entry(t1, &mm->tasklist, mm_peers) {
483 list_del_init(&task->mm_peers);
484 list_add_tail(&task->mm_peers, &mm->tasklist);
488 spin_unlock(&mm->peertask_lock);
489 ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new);
493 #define MEM_FAIL_OVER "fail_over"
494 #define MEM_SHRINK_AT "shrink_at"
495 #define MEM_SHRINK_TO "shrink_to"
496 #define MEM_SHRINK_COUNT "num_shrinks"
497 #define MEM_SHRINK_INTERVAL "shrink_interval"
499 int ckrm_mem_fail_over = 110;
500 int ckrm_mem_shrink_at = 90;
501 static int ckrm_mem_shrink_to = 80;
502 static int ckrm_mem_shrink_count = 10;
503 static int ckrm_mem_shrink_interval = 10;
505 EXPORT_SYMBOL_GPL(ckrm_mem_fail_over);
506 EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
509 mem_show_config(void *my_res, struct seq_file *sfile)
511 struct ckrm_mem_res *res = my_res;
516 seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
518 MEM_FAIL_OVER, ckrm_mem_fail_over,
519 MEM_SHRINK_AT, ckrm_mem_shrink_at,
520 MEM_SHRINK_TO, ckrm_mem_shrink_to,
521 MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
522 MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
527 // config file is available only at the root level,
528 // so assuming my_res to be the system level class
529 enum memclass_token {
538 static match_table_t mem_tokens = {
539 {mem_fail_over, MEM_FAIL_OVER "=%d"},
540 {mem_shrink_at, MEM_SHRINK_AT "=%d"},
541 {mem_shrink_to, MEM_SHRINK_TO "=%d"},
542 {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
543 {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
548 mem_set_config(void *my_res, const char *cfgstr)
551 struct ckrm_mem_res *res = my_res;
557 while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
558 substring_t args[MAX_OPT_ARGS];
563 token = match_token(p, mem_tokens, args);
566 if (match_int(args, &val) || (val <= 0)) {
569 ckrm_mem_fail_over = val;
573 if (match_int(args, &val) || (val <= 0)) {
576 ckrm_mem_shrink_at = val;
580 if (match_int(args, &val) || (val < 0) || (val > 100)) {
583 ckrm_mem_shrink_to = val;
586 case mem_shrink_count:
587 if (match_int(args, &val) || (val <= 0)) {
590 ckrm_mem_shrink_count = val;
593 case mem_shrink_interval:
594 if (match_int(args, &val) || (val <= 0)) {
597 ckrm_mem_shrink_interval = val;
608 mem_reset_stats(void *my_res)
610 struct ckrm_mem_res *res = my_res;
611 printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
616 struct ckrm_res_ctlr mem_rcbs = {
617 .res_name = MEM_NAME,
618 .res_hdepth = CKRM_MEM_MAX_HIERARCHY,
620 .res_alloc = mem_res_alloc,
621 .res_free = mem_res_free,
622 .set_share_values = mem_set_share_values,
623 .get_share_values = mem_get_share_values,
624 .get_stats = mem_get_stats,
625 .change_resclass = mem_change_resclass,
626 .show_config = mem_show_config,
627 .set_config = mem_set_config,
628 .reset_stats = mem_reset_stats,
631 EXPORT_SYMBOL_GPL(mem_rcbs);
634 init_ckrm_mem_res(void)
636 struct ckrm_classtype *clstype;
637 int resid = mem_rcbs.resid;
639 set_ckrm_tot_pages();
640 spin_lock_init(&ckrm_mem_lock);
641 clstype = ckrm_find_classtype_by_name("taskclass");
642 if (clstype == NULL) {
643 printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
648 resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
650 mem_rcbs.classtype = clstype;
653 return ((resid < 0) ? resid : 0);
657 exit_ckrm_mem_res(void)
659 ckrm_unregister_res_ctlr(&mem_rcbs);
663 module_init(init_ckrm_mem_res)
664 module_exit(exit_ckrm_mem_res)
667 ckrm_mem_get_shrink_to(void)
669 return ckrm_mem_shrink_to;
673 ckrm_at_limit(struct ckrm_mem_res *cls)
676 unsigned long now = jiffies;
678 if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
679 ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
682 if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
683 (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
684 cls->last_shrink = now;
685 cls->shrink_count = 0;
688 if (cls->shrink_count > ckrm_mem_shrink_count) {
691 spin_lock_irq(&ckrm_mem_lock);
692 list_add(&cls->shrink_list, &ckrm_shrink_list);
693 spin_unlock_irq(&ckrm_mem_lock);
694 cls->flags |= MEM_AT_LIMIT;
695 for_each_zone(zone) {
697 break; // only once is enough
702 ckrm_mem_evaluate_page_anon(struct page* page)
704 struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
705 struct ckrm_mem_res* maxshareclass = NULL;
706 struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
707 struct vm_area_struct *vma;
708 struct mm_struct* mm;
711 spin_lock(&anon_vma->lock);
712 BUG_ON(list_empty(&anon_vma->head));
713 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
715 if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
717 maxshareclass = mm->memclass;
720 spin_unlock(&anon_vma->lock);
722 if (!maxshareclass) {
723 maxshareclass = ckrm_mem_root_class;
725 if (pgcls != maxshareclass) {
726 ckrm_change_page_class(page, maxshareclass);
733 ckrm_mem_evaluate_page_file(struct page* page)
735 struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
736 struct ckrm_mem_res* maxshareclass = NULL;
737 struct address_space *mapping = page->mapping;
738 struct vm_area_struct *vma = NULL;
739 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
740 struct prio_tree_iter iter;
741 struct mm_struct* mm;
747 if (!spin_trylock(&mapping->i_mmap_lock))
750 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
753 if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
755 maxshareclass = mm->memclass;
757 spin_unlock(&mapping->i_mmap_lock);
759 if (!maxshareclass) {
760 maxshareclass = ckrm_mem_root_class;
762 if (pgcls != maxshareclass) {
763 ckrm_change_page_class(page, maxshareclass);
770 ckrm_mem_evaluate_page(struct page* page)
773 BUG_ON(page->ckrm_zone == NULL);
776 ret = ckrm_mem_evaluate_page_anon(page);
778 ret = ckrm_mem_evaluate_page_file(page);
784 ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res)
787 struct ckrm_zone *ckrm_zone;
789 struct list_head *pos, *next;
792 check_memclass(res, "bef_eval_all_pgs");
793 for (i = 0; i < MAX_NR_ZONES; i++) {
794 ckrm_zone = &res->ckrm_zone[i];
795 zone = ckrm_zone->zone;
796 spin_lock_irq(&zone->lru_lock);
797 pos = ckrm_zone->inactive_list.next;
798 while (pos != &ckrm_zone->inactive_list) {
800 page = list_entry(pos, struct page, lru);
801 if (!ckrm_mem_evaluate_page(page))
802 ckrm_change_page_class(page,
803 ckrm_mem_root_class);
806 pos = ckrm_zone->active_list.next;
807 while (pos != &ckrm_zone->active_list) {
809 page = list_entry(pos, struct page, lru);
810 if (!ckrm_mem_evaluate_page(page))
811 ckrm_change_page_class(page,
812 ckrm_mem_root_class);
815 spin_unlock_irq(&zone->lru_lock);
817 check_memclass(res, "aft_eval_all_pgs");
822 class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
823 pmd_t* pmdir, unsigned long address, unsigned long end)
826 unsigned long pmd_end;
828 if (pmd_none(*pmdir))
830 BUG_ON(pmd_bad(*pmdir));
832 pmd_end = (address+PMD_SIZE)&PMD_MASK;
837 pte = pte_offset_map(pmdir,address);
838 if (pte_present(*pte)) {
839 struct page *page = pte_page(*pte);
840 BUG_ON(mm->memclass == NULL);
841 if (page->mapping && page->ckrm_zone) {
842 struct zone *zone = page->ckrm_zone->zone;
843 spin_lock_irq(&zone->lru_lock);
844 ckrm_change_page_class(page, mm->memclass);
845 spin_unlock_irq(&zone->lru_lock);
848 address += PAGE_SIZE;
851 } while(address && (address<end));
856 class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
857 pgd_t* pgdir, unsigned long address, unsigned long end)
860 unsigned long pgd_end;
862 if (pgd_none(*pgdir))
864 BUG_ON(pgd_bad(*pgdir));
866 pmd = pmd_offset(pgdir,address);
867 pgd_end = (address+PGDIR_SIZE)&PGDIR_MASK;
869 if (pgd_end && (end>pgd_end))
873 class_migrate_pmd(mm,vma,pmd,address,end);
874 address = (address+PMD_SIZE)&PMD_MASK;
876 } while (address && (address<end));
881 class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
884 unsigned long address, end;
886 address = vma->vm_start;
889 pgdir = pgd_offset(vma->vm_mm, address);
891 class_migrate_pgd(mm,vma,pgdir,address,end);
892 address = (address + PGDIR_SIZE) & PGDIR_MASK;
894 } while(address && (address<end));
898 /* this function is called with mm->peertask_lock hold */
900 ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
902 struct task_struct *task;
903 struct ckrm_mem_res *maxshareclass = def;
904 struct vm_area_struct *vma;
906 if (list_empty(&mm->tasklist)) {
907 /* We leave the mm->memclass untouched since we believe that one
908 * mm with no task associated will be deleted soon or attach
909 * with another task later.
914 list_for_each_entry(task, &mm->tasklist, mm_peers) {
915 struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
918 if (!maxshareclass ||
919 ckrm_mem_share_compare(maxshareclass,cls)<0 )
923 if (maxshareclass && (mm->memclass != maxshareclass)) {
925 kref_put(&mm->memclass->nr_users, memclass_release);
927 mm->memclass = maxshareclass;
928 kref_get(&maxshareclass->nr_users);
930 /* Go through all VMA to migrate pages */
931 down_read(&mm->mmap_sem);
934 class_migrate_vma(mm, vma);
937 up_read(&mm->mmap_sem);
943 ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
945 spin_lock(&mm->peertask_lock);
946 if (!list_empty(&task->mm_peers)) {
947 printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
948 list_del_init(&task->mm_peers);
950 list_add_tail(&task->mm_peers, &mm->tasklist);
951 spin_unlock(&mm->peertask_lock);
952 if (mm->memclass != ckrm_get_mem_class(task))
953 ckrm_mem_evaluate_mm(mm, NULL);
958 ckrm_memclass_valid(struct ckrm_mem_res *cls)
960 struct ckrm_mem_res *tmp;
963 if (!cls || list_empty(&cls->mcls_list)) {
966 spin_lock_irqsave(&ckrm_mem_lock, flags);
967 list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
969 spin_unlock(&ckrm_mem_lock);
973 spin_unlock_irqrestore(&ckrm_mem_lock, flags);
977 MODULE_LICENSE("GPL");