1 /* ckrm_memcore.c - Memory Resource Manager for CKRM
3 * Copyright (C) Jiantao Kong, IBM Corp. 2003
4 * (C) Chandra Seetharaman, IBM Corp. 2004
6 * Provides a Memory Resource controller for CKRM
8 * Latest version, more details at http://ckrm.sf.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/slab.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/pagemap.h>
23 #include <linux/swap.h>
24 #include <linux/swapops.h>
25 #include <linux/cache.h>
26 #include <linux/percpu.h>
27 #include <linux/pagevec.h>
28 #include <linux/parser.h>
29 #include <linux/ckrm_mem_inline.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/errno.h>
35 #define MEM_RES_NAME "mem"
37 #define CKRM_MEM_MAX_HIERARCHY 2 /* allows only upto 2 levels - 0, 1 & 2 */
39 /* all 1-level memory_share_class are chained together */
40 LIST_HEAD(ckrm_memclass_list);
41 spinlock_t ckrm_mem_lock; /* protects list above */
42 unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
43 int ckrm_nr_mem_classes = 0;
44 struct ckrm_mem_res *ckrm_mem_root_class;
45 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
47 EXPORT_SYMBOL_GPL(ckrm_memclass_list);
48 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
49 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
50 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
51 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
52 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
55 memclass_release(struct kref *kref)
57 struct ckrm_mem_res *cls = container_of(kref,
58 struct ckrm_mem_res, nr_users);
61 EXPORT_SYMBOL_GPL(memclass_release);
64 set_ckrm_tot_pages(void)
67 int tot_lru_pages = 0;
70 tot_lru_pages += zone->nr_active;
71 tot_lru_pages += zone->nr_inactive;
72 tot_lru_pages += zone->free_pages;
74 ckrm_tot_lru_pages = tot_lru_pages;
77 /* Initialize rescls values
78 * May be called on each rcfs unmount or as part of error recovery
79 * to make share values sane.
80 * Does not traverse hierarchy reinitializing children.
83 mem_res_initcls_one(struct ckrm_mem_res *res)
88 memset(res, 0, sizeof(struct ckrm_mem_res));
90 res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
91 res->shares.my_limit = CKRM_SHARE_DONTCARE;
92 res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
93 res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
94 res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
95 res->shares.cur_max_limit = 0;
97 res->pg_guar = CKRM_SHARE_DONTCARE;
98 res->pg_limit = CKRM_SHARE_DONTCARE;
100 INIT_LIST_HEAD(&res->mcls_list);
101 INIT_LIST_HEAD(&res->shrink_list);
103 for_each_zone(zone) {
104 INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
105 INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
106 INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
107 res->ckrm_zone[zindex].nr_active = 0;
108 res->ckrm_zone[zindex].nr_inactive = 0;
109 res->ckrm_zone[zindex].zone = zone;
110 res->ckrm_zone[zindex].memcls = res;
115 res->nr_dontcare = 1; /* for default class */
116 kref_init(&res->nr_users);
120 set_impl_guar_children(struct ckrm_mem_res *parres)
122 struct ckrm_core_class *child = NULL;
123 struct ckrm_mem_res *cres;
124 int nr_dontcare = 1; // for defaultclass
126 int resid = mem_rcbs.resid;
128 ckrm_lock_hier(parres->core);
129 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
130 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
131 // treat NULL cres as don't care as that child is just being
133 // FIXME: need a better way to handle this case.
134 if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
139 parres->nr_dontcare = nr_dontcare;
140 guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
141 parres->impl_guar : parres->pg_unused;
142 impl_guar = guar / parres->nr_dontcare;
144 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
145 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
146 if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
147 cres->impl_guar = impl_guar;
148 set_impl_guar_children(cres);
151 ckrm_unlock_hier(parres->core);
156 mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
158 struct ckrm_mem_res *res, *pres;
160 BUG_ON(mem_rcbs.resid == -1);
162 pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
163 if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
164 printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
165 CKRM_MEM_MAX_HIERARCHY);
169 if ((parent == NULL) && (ckrm_mem_root_class != NULL)) {
170 printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
174 if ((parent != NULL) && (ckrm_mem_root_class == NULL)) {
175 printk(KERN_ERR "MEM_RC: child class with no root class!!");
179 res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
182 mem_res_initcls_one(res);
184 res->parent = parent;
185 spin_lock(&ckrm_mem_lock);
186 list_add(&res->mcls_list, &ckrm_memclass_list);
187 spin_unlock(&ckrm_mem_lock);
188 if (parent == NULL) {
189 /* I am the root class. So, set the max to *
190 * number of pages available in the system */
191 res->pg_guar = ckrm_tot_lru_pages;
192 res->pg_unused = ckrm_tot_lru_pages;
193 res->pg_limit = ckrm_tot_lru_pages;
195 ckrm_mem_root_class = res;
198 res->hier = pres->hier + 1;
199 set_impl_guar_children(pres);
200 guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
201 pres->impl_guar : pres->pg_unused;
202 res->impl_guar = guar / pres->nr_dontcare;
204 ckrm_nr_mem_classes++;
206 printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
211 * It is the caller's responsibility to make sure that the parent only
212 * has chilren that are to be accounted. i.e if a new child is added
213 * this function should be called after it has been added, and if a
214 * child is deleted this should be called after the child is removed.
217 child_maxlimit_changed_local(struct ckrm_mem_res *parres)
220 struct ckrm_mem_res *childres;
221 struct ckrm_core_class *child = NULL;
223 /* run thru parent's children and get new max_limit of parent */
224 ckrm_lock_hier(parres->core);
225 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
226 childres = ckrm_get_res_class(child, mem_rcbs.resid,
227 struct ckrm_mem_res);
228 if (maxlimit < childres->shares.my_limit) {
229 maxlimit = childres->shares.my_limit;
232 ckrm_unlock_hier(parres->core);
233 parres->shares.cur_max_limit = maxlimit;
237 * Recalculate the guarantee and limit in # of pages... and propagate the
239 * Caller is responsible for protecting res and for the integrity of parres
242 recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
244 struct ckrm_core_class *child = NULL;
245 struct ckrm_mem_res *cres;
246 int resid = mem_rcbs.resid;
247 struct ckrm_shares *self = &res->shares;
250 struct ckrm_shares *par = &parres->shares;
252 /* calculate pg_guar and pg_limit */
253 if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
254 self->my_guarantee == CKRM_SHARE_DONTCARE) {
255 res->pg_guar = CKRM_SHARE_DONTCARE;
256 } else if (par->total_guarantee) {
257 u64 temp = (u64) self->my_guarantee * parres->pg_guar;
258 do_div(temp, par->total_guarantee);
259 res->pg_guar = (int) temp;
260 res->impl_guar = CKRM_SHARE_DONTCARE;
263 res->impl_guar = CKRM_SHARE_DONTCARE;
266 if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
267 self->my_limit == CKRM_SHARE_DONTCARE) {
268 res->pg_limit = CKRM_SHARE_DONTCARE;
269 } else if (par->max_limit) {
270 u64 temp = (u64) self->my_limit * parres->pg_limit;
271 do_div(temp, par->max_limit);
272 res->pg_limit = (int) temp;
278 /* Calculate unused units */
279 if (res->pg_guar == CKRM_SHARE_DONTCARE) {
280 res->pg_unused = CKRM_SHARE_DONTCARE;
281 } else if (self->total_guarantee) {
282 u64 temp = (u64) self->unused_guarantee * res->pg_guar;
283 do_div(temp, self->total_guarantee);
284 res->pg_unused = (int) temp;
289 /* propagate to children */
290 ckrm_lock_hier(res->core);
291 while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
292 cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
293 recalc_and_propagate(cres, res);
295 ckrm_unlock_hier(res->core);
300 mem_res_free(void *my_res)
302 struct ckrm_mem_res *res = my_res;
303 struct ckrm_mem_res *pres;
308 ckrm_mem_migrate_all_pages(res, ckrm_mem_root_class);
310 pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
311 struct ckrm_mem_res);
314 child_guarantee_changed(&pres->shares,
315 res->shares.my_guarantee, 0);
316 child_maxlimit_changed_local(pres);
317 recalc_and_propagate(pres, NULL);
318 set_impl_guar_children(pres);
322 * Making it all zero as freeing of data structure could
325 res->shares.my_guarantee = 0;
326 res->shares.my_limit = 0;
331 spin_lock(&ckrm_mem_lock);
332 list_del_init(&res->mcls_list);
333 spin_unlock(&ckrm_mem_lock);
337 kref_put(&res->nr_users, memclass_release);
338 ckrm_nr_mem_classes--;
343 mem_set_share_values(void *my_res, struct ckrm_shares *shares)
345 struct ckrm_mem_res *res = my_res;
346 struct ckrm_mem_res *parres;
352 parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
353 struct ckrm_mem_res);
355 rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
357 if ((rc == 0) && (parres != NULL)) {
358 child_maxlimit_changed_local(parres);
359 recalc_and_propagate(parres, NULL);
360 set_impl_guar_children(parres);
367 mem_get_share_values(void *my_res, struct ckrm_shares *shares)
369 struct ckrm_mem_res *res = my_res;
373 printk(KERN_INFO "get_share called for %s resource of class %s\n",
374 MEM_RES_NAME, res->core->name);
375 *shares = res->shares;
380 mem_get_stats(void *my_res, struct seq_file *sfile)
382 struct ckrm_mem_res *res = my_res;
384 int active = 0, inactive = 0, fr = 0;
389 seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
390 if (res == ckrm_mem_root_class) {
392 for_each_zone(zone) {
393 active += zone->nr_active;
394 inactive += zone->nr_inactive;
395 fr += zone->free_pages;
398 seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
399 ",free=%d\n", ckrm_tot_lru_pages,
400 active, inactive, fr);
402 seq_printf(sfile, "Number of pages used(including pages lent to"
403 " children): %d\n", atomic_read(&res->pg_total));
404 seq_printf(sfile, "Number of pages guaranteed: %d\n",
406 seq_printf(sfile, "Maximum limit of pages: %d\n",
408 seq_printf(sfile, "Total number of pages available"
409 "(after serving guarantees to children): %d\n",
411 seq_printf(sfile, "Number of pages lent to children: %d\n",
413 seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
415 seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
421 mem_change_resclass(void *tsk, void *old, void *new)
423 struct mm_struct *mm;
424 struct task_struct *task = tsk, *t1;
425 struct ckrm_mem_res *prev_mmcls;
427 if (!task->mm || (new == old) || (old == (void *) -1))
430 mm = task->active_mm;
431 spin_lock(&mm->peertask_lock);
432 prev_mmcls = mm->memclass;
435 list_del_init(&task->mm_peers);
438 list_for_each_entry(t1, &mm->tasklist, mm_peers) {
445 list_del_init(&task->mm_peers);
446 list_add_tail(&task->mm_peers, &mm->tasklist);
450 spin_unlock(&mm->peertask_lock);
451 ckrm_mem_migrate_mm(mm, (struct ckrm_mem_res *) new);
455 #define MEM_FAIL_OVER "fail_over"
456 #define MEM_SHRINK_AT "shrink_at"
457 #define MEM_SHRINK_TO "shrink_to"
458 #define MEM_SHRINK_COUNT "num_shrinks"
459 #define MEM_SHRINK_INTERVAL "shrink_interval"
461 int ckrm_mem_fail_at = 110;
462 int ckrm_mem_shrink_at = 90;
463 int ckrm_mem_shrink_to = 80;
464 int ckrm_mem_shrink_count = 10;
465 int ckrm_mem_shrink_interval = 10;
467 EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
468 EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
469 EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
472 mem_show_config(void *my_res, struct seq_file *sfile)
474 struct ckrm_mem_res *res = my_res;
479 seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
481 MEM_FAIL_OVER, ckrm_mem_fail_at,
482 MEM_SHRINK_AT, ckrm_mem_shrink_at,
483 MEM_SHRINK_TO, ckrm_mem_shrink_to,
484 MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
485 MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
490 typedef int __bitwise memclass_token_t;
492 enum memclass_token {
493 mem_fail_over = (__force memclass_token_t) 1,
494 mem_shrink_at = (__force memclass_token_t) 2,
495 mem_shrink_to = (__force memclass_token_t) 3,
496 mem_shrink_count = (__force memclass_token_t) 4,
497 mem_shrink_interval = (__force memclass_token_t) 5,
498 mem_err = (__force memclass_token_t) 6
501 static match_table_t mem_tokens = {
502 {mem_fail_over, MEM_FAIL_OVER "=%d"},
503 {mem_shrink_at, MEM_SHRINK_AT "=%d"},
504 {mem_shrink_to, MEM_SHRINK_TO "=%d"},
505 {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
506 {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
511 mem_set_config(void *my_res, const char *cfgstr)
514 struct ckrm_mem_res *res = my_res;
520 while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
521 substring_t args[MAX_OPT_ARGS];
526 token = match_token(p, mem_tokens, args);
529 if (match_int(args, &val) || (val <= 0)) {
532 ckrm_mem_fail_at = val;
536 if (match_int(args, &val) || (val <= 0)) {
539 ckrm_mem_shrink_at = val;
543 if (match_int(args, &val) || (val < 0) || (val > 100)) {
546 ckrm_mem_shrink_to = val;
549 case mem_shrink_count:
550 if (match_int(args, &val) || (val <= 0)) {
553 ckrm_mem_shrink_count = val;
556 case mem_shrink_interval:
557 if (match_int(args, &val) || (val <= 0)) {
560 ckrm_mem_shrink_interval = val;
571 mem_reset_stats(void *my_res)
573 struct ckrm_mem_res *res = my_res;
574 printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
579 struct ckrm_res_ctlr mem_rcbs = {
580 .res_name = MEM_RES_NAME,
581 .res_hdepth = CKRM_MEM_MAX_HIERARCHY,
583 .res_alloc = mem_res_alloc,
584 .res_free = mem_res_free,
585 .set_share_values = mem_set_share_values,
586 .get_share_values = mem_get_share_values,
587 .get_stats = mem_get_stats,
588 .change_resclass = mem_change_resclass,
589 .show_config = mem_show_config,
590 .set_config = mem_set_config,
591 .reset_stats = mem_reset_stats,
594 EXPORT_SYMBOL_GPL(mem_rcbs);
597 init_ckrm_mem_res(void)
599 struct ckrm_classtype *clstype;
600 int resid = mem_rcbs.resid;
602 set_ckrm_tot_pages();
603 spin_lock_init(&ckrm_mem_lock);
604 clstype = ckrm_find_classtype_by_name("taskclass");
605 if (clstype == NULL) {
606 printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
611 resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
613 mem_rcbs.classtype = clstype;
616 return ((resid < 0) ? resid : 0);
620 exit_ckrm_mem_res(void)
622 ckrm_unregister_res_ctlr(&mem_rcbs);
626 module_init(init_ckrm_mem_res)
627 module_exit(exit_ckrm_mem_res)
628 MODULE_LICENSE("GPL");