1 /* ckrm_mem.c - Memory Resource Manager for CKRM
3 * Copyright (C) Chandra Seetharaman, IBM Corp. 2004
5 * Provides a Memory Resource controller for CKRM
7 * Latest version, more details at http://ckrm.sf.net
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
16 #include <linux/module.h>
17 #include <linux/init.h>
18 #include <linux/slab.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/pagemap.h>
22 #include <linux/swap.h>
23 #include <linux/swapops.h>
24 #include <linux/cache.h>
25 #include <linux/percpu.h>
26 #include <linux/pagevec.h>
27 #include <linux/parser.h>
28 #include <linux/ckrm_mem_inline.h>
30 #include <asm/uaccess.h>
31 #include <asm/pgtable.h>
32 #include <asm/errno.h>
34 #define MEM_NAME "mem"
36 #define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2
38 /* all 1-level memory_share_class are chained together */
39 LIST_HEAD(ckrm_memclass_list);
40 LIST_HEAD(ckrm_shrink_list);
41 spinlock_t ckrm_mem_lock; // protects both lists above
42 unsigned int ckrm_tot_lru_pages; // total # of pages in the system
43 // currently doesn't handle memory add/remove
44 struct ckrm_mem_res *ckrm_mem_root_class;
45 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
46 static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *);
47 int ckrm_nr_mem_classes = 0;
49 EXPORT_SYMBOL_GPL(ckrm_memclass_list);
50 EXPORT_SYMBOL_GPL(ckrm_shrink_list);
51 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
52 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
53 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
54 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
55 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
57 /* Initialize rescls values
58 * May be called on each rcfs unmount or as part of error recovery
59 * to make share values sane.
60 * Does not traverse hierarchy reinitializing children.
64 memclass_release(struct kref *kref)
66 struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users);
67 BUG_ON(ckrm_memclass_valid(cls));
70 EXPORT_SYMBOL_GPL(memclass_release);
73 set_ckrm_tot_pages(void)
76 int tot_lru_pages = 0;
79 tot_lru_pages += zone->nr_active;
80 tot_lru_pages += zone->nr_inactive;
81 tot_lru_pages += zone->free_pages;
83 ckrm_tot_lru_pages = tot_lru_pages;
87 mem_res_initcls_one(struct ckrm_mem_res *res)
92 memset(res, 0, sizeof(struct ckrm_mem_res));
94 res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
95 res->shares.my_limit = CKRM_SHARE_DONTCARE;
96 res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
97 res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
98 res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
99 res->shares.cur_max_limit = 0;
101 res->pg_guar = CKRM_SHARE_DONTCARE;
102 res->pg_limit = CKRM_SHARE_DONTCARE;
104 INIT_LIST_HEAD(&res->shrink_list);
105 INIT_LIST_HEAD(&res->mcls_list);
107 for_each_zone(zone) {
108 INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
109 INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
110 INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
111 res->ckrm_zone[zindex].nr_active = 0;
112 res->ckrm_zone[zindex].nr_inactive = 0;
113 res->ckrm_zone[zindex].zone = zone;
114 res->ckrm_zone[zindex].memcls = res;
119 res->nr_dontcare = 1; // for default class
120 kref_init(&res->nr_users);
124 set_impl_guar_children(struct ckrm_mem_res *parres)
126 ckrm_core_class_t *child = NULL;
127 struct ckrm_mem_res *cres;
128 int nr_dontcare = 1; // for defaultclass
130 int resid = mem_rcbs.resid;
132 ckrm_lock_hier(parres->core);
133 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
134 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
135 // treat NULL cres as don't care as that child is just being
137 // FIXME: need a better way to handle this case.
138 if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
143 parres->nr_dontcare = nr_dontcare;
144 guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
145 parres->impl_guar : parres->pg_unused;
146 impl_guar = guar / parres->nr_dontcare;
148 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
149 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
150 if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
151 cres->impl_guar = impl_guar;
152 set_impl_guar_children(cres);
155 ckrm_unlock_hier(parres->core);
160 check_memclass(struct ckrm_mem_res *res, char *str)
162 int i, act = 0, inact = 0;
164 struct ckrm_zone *ckrm_zone;
165 struct list_head *pos;
169 printk("Check<%s> %s: total=%d\n",
170 str, res->core->name, atomic_read(&res->pg_total));
172 for (i = 0; i < MAX_NR_ZONES; i++) {
174 ckrm_zone = &res->ckrm_zone[i];
175 zone = ckrm_zone->zone;
176 spin_lock_irq(&zone->lru_lock);
177 pos = ckrm_zone->inactive_list.next;
178 while (pos != &ckrm_zone->inactive_list) {
179 page = list_entry(pos, struct page, lru);
183 pos = ckrm_zone->active_list.next;
184 while (pos != &ckrm_zone->active_list) {
185 page = list_entry(pos, struct page, lru);
189 spin_unlock_irq(&zone->lru_lock);
191 printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n",
192 str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive,
197 EXPORT_SYMBOL_GPL(check_memclass);
200 mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
202 struct ckrm_mem_res *res, *pres;
204 if (mem_rcbs.resid == -1) {
208 pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
209 if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
210 printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
211 CKRM_MEM_MAX_HIERARCHY);
215 if (unlikely((parent == NULL) && (ckrm_mem_root_class != NULL))) {
216 printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
220 if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) {
221 printk(KERN_ERR "MEM_RC: child class with no root class!!");
225 res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
228 mem_res_initcls_one(res);
230 res->parent = parent;
231 spin_lock_irq(&ckrm_mem_lock);
232 list_add(&res->mcls_list, &ckrm_memclass_list);
233 spin_unlock_irq(&ckrm_mem_lock);
234 if (parent == NULL) {
235 // I am part of the root class. So, set the max to
236 // number of pages available
237 res->pg_guar = ckrm_tot_lru_pages;
238 res->pg_unused = ckrm_tot_lru_pages;
239 res->pg_limit = ckrm_tot_lru_pages;
241 ckrm_mem_root_class = res;
244 res->hier = pres->hier + 1;
245 set_impl_guar_children(pres);
246 guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
247 pres->impl_guar : pres->pg_unused;
248 res->impl_guar = guar / pres->nr_dontcare;
250 ckrm_nr_mem_classes++;
253 printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
258 * It is the caller's responsibility to make sure that the parent only
259 * has chilren that are to be accounted. i.e if a new child is added
260 * this function should be called after it has been added, and if a
261 * child is deleted this should be called after the child is removed.
264 child_maxlimit_changed_local(struct ckrm_mem_res *parres)
267 struct ckrm_mem_res *childres;
268 ckrm_core_class_t *child = NULL;
270 // run thru parent's children and get the new max_limit of the parent
271 ckrm_lock_hier(parres->core);
272 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
273 childres = ckrm_get_res_class(child, mem_rcbs.resid,
274 struct ckrm_mem_res);
275 if (maxlimit < childres->shares.my_limit) {
276 maxlimit = childres->shares.my_limit;
279 ckrm_unlock_hier(parres->core);
280 parres->shares.cur_max_limit = maxlimit;
284 * Recalculate the guarantee and limit in # of pages... and propagate the
286 * Caller is responsible for protecting res and for the integrity of parres
289 recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
291 ckrm_core_class_t *child = NULL;
292 struct ckrm_mem_res *cres;
293 int resid = mem_rcbs.resid;
294 struct ckrm_shares *self = &res->shares;
297 struct ckrm_shares *par = &parres->shares;
299 // calculate pg_guar and pg_limit
301 if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
302 self->my_guarantee == CKRM_SHARE_DONTCARE) {
303 res->pg_guar = CKRM_SHARE_DONTCARE;
304 } else if (par->total_guarantee) {
305 u64 temp = (u64) self->my_guarantee * parres->pg_guar;
306 do_div(temp, par->total_guarantee);
307 res->pg_guar = (int) temp;
308 res->impl_guar = CKRM_SHARE_DONTCARE;
311 res->impl_guar = CKRM_SHARE_DONTCARE;
314 if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
315 self->my_limit == CKRM_SHARE_DONTCARE) {
316 res->pg_limit = CKRM_SHARE_DONTCARE;
317 } else if (par->max_limit) {
318 u64 temp = (u64) self->my_limit * parres->pg_limit;
319 do_div(temp, par->max_limit);
320 res->pg_limit = (int) temp;
326 // Calculate unused units
327 if (res->pg_guar == CKRM_SHARE_DONTCARE) {
328 res->pg_unused = CKRM_SHARE_DONTCARE;
329 } else if (self->total_guarantee) {
330 u64 temp = (u64) self->unused_guarantee * res->pg_guar;
331 do_div(temp, self->total_guarantee);
332 res->pg_unused = (int) temp;
337 // propagate to children
338 ckrm_lock_hier(res->core);
339 while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
340 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
341 recalc_and_propagate(cres, res);
343 ckrm_unlock_hier(res->core);
348 mem_res_free(void *my_res)
350 struct ckrm_mem_res *res = my_res;
351 struct ckrm_mem_res *pres;
356 ckrm_mem_evaluate_all_pages(res);
358 pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
359 struct ckrm_mem_res);
362 child_guarantee_changed(&pres->shares,
363 res->shares.my_guarantee, 0);
364 child_maxlimit_changed_local(pres);
365 recalc_and_propagate(pres, NULL);
366 set_impl_guar_children(pres);
369 res->shares.my_guarantee = 0;
370 res->shares.my_limit = 0;
375 spin_lock_irq(&ckrm_mem_lock);
376 list_del_init(&res->mcls_list);
377 spin_unlock_irq(&ckrm_mem_lock);
381 kref_put(&res->nr_users, memclass_release);
382 ckrm_nr_mem_classes--;
387 mem_set_share_values(void *my_res, struct ckrm_shares *shares)
389 struct ckrm_mem_res *res = my_res;
390 struct ckrm_mem_res *parres;
396 parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
397 struct ckrm_mem_res);
399 rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
401 if ((rc == 0) && (parres != NULL)) {
402 child_maxlimit_changed_local(parres);
403 recalc_and_propagate(parres, NULL);
404 set_impl_guar_children(parres);
411 mem_get_share_values(void *my_res, struct ckrm_shares *shares)
413 struct ckrm_mem_res *res = my_res;
417 *shares = res->shares;
422 mem_get_stats(void *my_res, struct seq_file *sfile)
424 struct ckrm_mem_res *res = my_res;
426 int active = 0, inactive = 0, fr = 0;
431 seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
432 if (res == ckrm_mem_root_class) {
434 for_each_zone(zone) {
435 active += zone->nr_active;
436 inactive += zone->nr_inactive;
437 fr += zone->free_pages;
440 seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
441 ",free=%d\n", ckrm_tot_lru_pages,
442 active, inactive, fr);
444 seq_printf(sfile, "Number of pages used(including pages lent to"
445 " children): %d\n", atomic_read(&res->pg_total));
446 seq_printf(sfile, "Number of pages guaranteed: %d\n",
448 seq_printf(sfile, "Maximum limit of pages: %d\n",
450 seq_printf(sfile, "Total number of pages available"
451 "(after serving guarantees to children): %d\n",
453 seq_printf(sfile, "Number of pages lent to children: %d\n",
455 seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
457 seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
463 mem_change_resclass(void *tsk, void *old, void *new)
465 struct mm_struct *mm;
466 struct task_struct *task = tsk, *t1;
467 struct ckrm_mem_res *prev_mmcls;
469 if (!task->mm || (new == old) || (old == (void *) -1))
472 mm = task->active_mm;
473 spin_lock(&mm->peertask_lock);
474 prev_mmcls = mm->memclass;
477 list_del_init(&task->mm_peers);
480 list_for_each_entry(t1, &mm->tasklist, mm_peers) {
487 list_del_init(&task->mm_peers);
488 list_add_tail(&task->mm_peers, &mm->tasklist);
492 spin_unlock(&mm->peertask_lock);
493 ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new);
497 #define MEM_FAIL_OVER "fail_over"
498 #define MEM_SHRINK_AT "shrink_at"
499 #define MEM_SHRINK_TO "shrink_to"
500 #define MEM_SHRINK_COUNT "num_shrinks"
501 #define MEM_SHRINK_INTERVAL "shrink_interval"
503 int ckrm_mem_fail_over = 110;
504 int ckrm_mem_shrink_at = 90;
505 static int ckrm_mem_shrink_to = 80;
506 static int ckrm_mem_shrink_count = 10;
507 static int ckrm_mem_shrink_interval = 10;
509 EXPORT_SYMBOL_GPL(ckrm_mem_fail_over);
510 EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
513 mem_show_config(void *my_res, struct seq_file *sfile)
515 struct ckrm_mem_res *res = my_res;
520 seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
522 MEM_FAIL_OVER, ckrm_mem_fail_over,
523 MEM_SHRINK_AT, ckrm_mem_shrink_at,
524 MEM_SHRINK_TO, ckrm_mem_shrink_to,
525 MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
526 MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
531 // config file is available only at the root level,
532 // so assuming my_res to be the system level class
533 enum memclass_token {
542 static match_table_t mem_tokens = {
543 {mem_fail_over, MEM_FAIL_OVER "=%d"},
544 {mem_shrink_at, MEM_SHRINK_AT "=%d"},
545 {mem_shrink_to, MEM_SHRINK_TO "=%d"},
546 {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
547 {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
552 mem_set_config(void *my_res, const char *cfgstr)
555 struct ckrm_mem_res *res = my_res;
561 while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
562 substring_t args[MAX_OPT_ARGS];
567 token = match_token(p, mem_tokens, args);
570 if (match_int(args, &val) || (val <= 0)) {
573 ckrm_mem_fail_over = val;
577 if (match_int(args, &val) || (val <= 0)) {
580 ckrm_mem_shrink_at = val;
584 if (match_int(args, &val) || (val < 0) || (val > 100)) {
587 ckrm_mem_shrink_to = val;
590 case mem_shrink_count:
591 if (match_int(args, &val) || (val <= 0)) {
594 ckrm_mem_shrink_count = val;
597 case mem_shrink_interval:
598 if (match_int(args, &val) || (val <= 0)) {
601 ckrm_mem_shrink_interval = val;
612 mem_reset_stats(void *my_res)
614 struct ckrm_mem_res *res = my_res;
615 printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
620 struct ckrm_res_ctlr mem_rcbs = {
621 .res_name = MEM_NAME,
622 .res_hdepth = CKRM_MEM_MAX_HIERARCHY,
624 .res_alloc = mem_res_alloc,
625 .res_free = mem_res_free,
626 .set_share_values = mem_set_share_values,
627 .get_share_values = mem_get_share_values,
628 .get_stats = mem_get_stats,
629 .change_resclass = mem_change_resclass,
630 .show_config = mem_show_config,
631 .set_config = mem_set_config,
632 .reset_stats = mem_reset_stats,
635 EXPORT_SYMBOL_GPL(mem_rcbs);
638 init_ckrm_mem_res(void)
640 struct ckrm_classtype *clstype;
641 int resid = mem_rcbs.resid;
643 set_ckrm_tot_pages();
644 spin_lock_init(&ckrm_mem_lock);
645 clstype = ckrm_find_classtype_by_name("taskclass");
646 if (clstype == NULL) {
647 printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
652 resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
654 mem_rcbs.classtype = clstype;
657 return ((resid < 0) ? resid : 0);
661 exit_ckrm_mem_res(void)
663 ckrm_unregister_res_ctlr(&mem_rcbs);
667 module_init(init_ckrm_mem_res)
668 module_exit(exit_ckrm_mem_res)
671 ckrm_mem_get_shrink_to(void)
673 return ckrm_mem_shrink_to;
677 ckrm_at_limit(struct ckrm_mem_res *cls)
680 unsigned long now = jiffies;
682 if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
683 ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
686 if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
687 (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
688 cls->last_shrink = now;
689 cls->shrink_count = 0;
692 if (cls->shrink_count > ckrm_mem_shrink_count) {
695 spin_lock_irq(&ckrm_mem_lock);
696 list_add(&cls->shrink_list, &ckrm_shrink_list);
697 spin_unlock_irq(&ckrm_mem_lock);
698 cls->flags |= MEM_AT_LIMIT;
699 for_each_zone(zone) {
701 break; // only once is enough
706 ckrm_mem_evaluate_page_anon(struct page* page)
708 struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
709 struct ckrm_mem_res* maxshareclass = NULL;
710 struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
711 struct vm_area_struct *vma;
712 struct mm_struct* mm;
715 spin_lock(&anon_vma->lock);
716 BUG_ON(list_empty(&anon_vma->head));
717 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
719 if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
721 maxshareclass = mm->memclass;
724 spin_unlock(&anon_vma->lock);
726 if (!maxshareclass) {
727 maxshareclass = ckrm_mem_root_class;
729 if (pgcls != maxshareclass) {
730 ckrm_change_page_class(page, maxshareclass);
737 ckrm_mem_evaluate_page_file(struct page* page)
739 struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
740 struct ckrm_mem_res* maxshareclass = NULL;
741 struct address_space *mapping = page->mapping;
742 struct vm_area_struct *vma = NULL;
743 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
744 struct prio_tree_iter iter;
745 struct mm_struct* mm;
751 if (!spin_trylock(&mapping->i_mmap_lock))
754 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
757 if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
759 maxshareclass = mm->memclass;
761 spin_unlock(&mapping->i_mmap_lock);
763 if (!maxshareclass) {
764 maxshareclass = ckrm_mem_root_class;
766 if (pgcls != maxshareclass) {
767 ckrm_change_page_class(page, maxshareclass);
774 ckrm_mem_evaluate_page(struct page* page)
777 BUG_ON(page->ckrm_zone == NULL);
780 ret = ckrm_mem_evaluate_page_anon(page);
782 ret = ckrm_mem_evaluate_page_file(page);
788 ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res)
791 struct ckrm_zone *ckrm_zone;
793 struct list_head *pos, *next;
796 check_memclass(res, "bef_eval_all_pgs");
797 for (i = 0; i < MAX_NR_ZONES; i++) {
798 ckrm_zone = &res->ckrm_zone[i];
799 zone = ckrm_zone->zone;
800 spin_lock_irq(&zone->lru_lock);
801 pos = ckrm_zone->inactive_list.next;
802 while (pos != &ckrm_zone->inactive_list) {
804 page = list_entry(pos, struct page, lru);
805 if (!ckrm_mem_evaluate_page(page))
806 ckrm_change_page_class(page,
807 ckrm_mem_root_class);
810 pos = ckrm_zone->active_list.next;
811 while (pos != &ckrm_zone->active_list) {
813 page = list_entry(pos, struct page, lru);
814 if (!ckrm_mem_evaluate_page(page))
815 ckrm_change_page_class(page,
816 ckrm_mem_root_class);
819 spin_unlock_irq(&zone->lru_lock);
821 check_memclass(res, "aft_eval_all_pgs");
826 class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
827 pmd_t* pmdir, unsigned long address, unsigned long end)
830 unsigned long pmd_end;
832 if (pmd_none(*pmdir))
834 BUG_ON(pmd_bad(*pmdir));
836 pmd_end = (address+PMD_SIZE)&PMD_MASK;
841 pte = pte_offset_map(pmdir,address);
842 if (pte_present(*pte)) {
843 struct page *page = pte_page(*pte);
844 BUG_ON(mm->memclass == NULL);
845 if (page->mapping && page->ckrm_zone) {
846 struct zone *zone = page->ckrm_zone->zone;
847 spin_lock_irq(&zone->lru_lock);
848 ckrm_change_page_class(page, mm->memclass);
849 spin_unlock_irq(&zone->lru_lock);
852 address += PAGE_SIZE;
855 } while(address && (address<end));
860 class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
861 pgd_t* pgdir, unsigned long address, unsigned long end)
864 unsigned long pgd_end;
866 if (pgd_none(*pgdir))
868 BUG_ON(pgd_bad(*pgdir));
870 pmd = pmd_offset(pgdir,address);
871 pgd_end = (address+PGDIR_SIZE)&PGDIR_MASK;
873 if (pgd_end && (end>pgd_end))
877 class_migrate_pmd(mm,vma,pmd,address,end);
878 address = (address+PMD_SIZE)&PMD_MASK;
880 } while (address && (address<end));
885 class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
888 unsigned long address, end;
890 address = vma->vm_start;
893 pgdir = pgd_offset(vma->vm_mm, address);
895 class_migrate_pgd(mm,vma,pgdir,address,end);
896 address = (address + PGDIR_SIZE) & PGDIR_MASK;
898 } while(address && (address<end));
902 /* this function is called with mm->peertask_lock hold */
904 ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
906 struct task_struct *task;
907 struct ckrm_mem_res *maxshareclass = def;
908 struct vm_area_struct *vma;
910 if (list_empty(&mm->tasklist)) {
911 /* We leave the mm->memclass untouched since we believe that one
912 * mm with no task associated will be deleted soon or attach
913 * with another task later.
918 list_for_each_entry(task, &mm->tasklist, mm_peers) {
919 struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
922 if (!maxshareclass ||
923 ckrm_mem_share_compare(maxshareclass,cls)<0 )
927 if (maxshareclass && (mm->memclass != maxshareclass)) {
929 kref_put(&mm->memclass->nr_users, memclass_release);
931 mm->memclass = maxshareclass;
932 kref_get(&maxshareclass->nr_users);
934 /* Go through all VMA to migrate pages */
935 down_read(&mm->mmap_sem);
938 class_migrate_vma(mm, vma);
941 up_read(&mm->mmap_sem);
947 ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
949 spin_lock(&mm->peertask_lock);
950 if (!list_empty(&task->mm_peers)) {
951 printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
952 list_del_init(&task->mm_peers);
954 list_add_tail(&task->mm_peers, &mm->tasklist);
955 spin_unlock(&mm->peertask_lock);
956 if (mm->memclass != ckrm_get_mem_class(task))
957 ckrm_mem_evaluate_mm(mm, NULL);
962 ckrm_memclass_valid(struct ckrm_mem_res *cls)
964 struct ckrm_mem_res *tmp;
967 if (!cls || list_empty(&cls->mcls_list)) {
970 spin_lock_irqsave(&ckrm_mem_lock, flags);
971 list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
973 spin_unlock(&ckrm_mem_lock);
977 spin_unlock_irqrestore(&ckrm_mem_lock, flags);
981 MODULE_LICENSE("GPL");