- freeing pages from the inactive list (shrink_zone)
depending on the recent usage of the page(approximately).
+In the process of the life cycle a page can move from the lru list to swap
+and back. For this document's purpose, we treat it same as freeing and
+allocating the page, respectfully.
+
1. Introduction
---------------
Memory resource controller controls the number of lru physical pages
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab
-# Wed Mar 2 15:48:12 2005
+# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.03.31
+# Thu Mar 31 11:50:25 2005
#
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_RCFS_FS=y
CONFIG_CKRM_TYPE_TASKCLASS=y
CONFIG_CKRM_RES_NULL=m
+CONFIG_CKRM_RES_MEM=y
+# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
CONFIG_CKRM_RES_NUMTASKS=y
CONFIG_CKRM_CPU_SCHEDULE=y
# CONFIG_CKRM_RES_BLKIO is not set
-# CONFIG_CKRM_RES_MEM is not set
CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT=y
-# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
CONFIG_CKRM_RBCE=y
+# CONFIG_CKRM_CRBCE is not set
CONFIG_SYSCTL=y
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
activate_mm(active_mm, mm);
task_unlock(tsk);
arch_pick_mmap_layout(mm);
- ckrm_task_change_mm(tsk, old_mm, mm);
+ ckrm_task_mm_change(tsk, old_mm, mm);
if (old_mm) {
if (active_mm != old_mm) BUG();
mmput(old_mm);
struct list_head active_list;
struct list_head inactive_list;
- unsigned long nr_active; // # of pages in the active list
- unsigned long nr_inactive; // # of pages in the inactive list
+ unsigned long nr_active;
+ unsigned long nr_inactive;
unsigned long active_over;
unsigned long inactive_over;
unsigned long shrink_inactive;
long shrink_weight;
unsigned long shrink_flag;
-
- struct list_head victim_list; // list of ckrm_zones chosen for shrinking
+ struct list_head victim_list; /* list of ckrm_zones chosen for
+ * shrinking. These are over their
+ * 'guarantee'
+ */
struct zone *zone;
struct ckrm_mem_res *memcls;
};
struct ckrm_mem_res {
unsigned long flags;
- struct ckrm_core_class *core; // the core i am part of...
- struct ckrm_core_class *parent; // parent of the core i am part of....
- struct ckrm_shares shares;
- struct list_head mcls_list; // list of all 1-level classes
- struct list_head shrink_list; // list of classes need to be shrunk
- struct kref nr_users; // # of references to this class/data structure
- atomic_t pg_total; // # of pages used by this class
- int pg_guar; // # of pages this class is guaranteed
- int pg_limit; // max # of pages this class can get
- int pg_borrowed; // # of pages this class borrowed from its parent
- int pg_lent; // # of pages this class lent to its children
- int pg_unused; // # of pages left to this class (after giving the
- // guarantees to children. need to borrow from parent if
- // more than this is needed.
- int impl_guar; // implicit guarantee for class with don't care guar
- int nr_dontcare; // # of children with don't care guarantee
+ struct ckrm_core_class *core; /* the core i am part of... */
+ struct ckrm_core_class *parent; /* parent of the core i am part of */
+ struct ckrm_shares shares;
+ struct list_head mcls_list; /* list of all 1-level classes */
+ struct kref nr_users; /* ref count */
+ atomic_t pg_total; /* # of pages used by this class */
+ int pg_guar; /* absolute # of guarantee */
+ int pg_limit; /* absolute # of limit */
+ int pg_borrowed; /* # of pages borrowed from parent */
+ int pg_lent; /* # of pages lent to children */
+ int pg_unused; /* # of pages left to this class
+ * (after giving the guarantees to
+ * children. need to borrow from
+ * parent if more than this is needed.
+ */
+ int hier; /* hiearchy level, root = 0 */
+ int impl_guar; /* for classes with don't care guar */
+ int nr_dontcare; /* # of dont care children */
+
struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+ struct list_head shrink_list; /* list of classes that are near
+ * limit and need to be shrunk
+ */
int shrink_count;
unsigned long last_shrink;
- int over_limit_failures;
- int shrink_pages; // # of pages to free in this class
- int hier; // hiearchy, root = 0
};
+#define CLS_SHRINK_BIT (1)
+
+#define CLS_AT_LIMIT (1)
+
extern atomic_t ckrm_mem_real_count;
-extern unsigned int ckrm_tot_lru_pages;
-extern int ckrm_nr_mem_classes;
-extern struct list_head ckrm_shrink_list;
-extern struct list_head ckrm_memclass_list;
-extern spinlock_t ckrm_mem_lock;
extern struct ckrm_res_ctlr mem_rcbs;
extern struct ckrm_mem_res *ckrm_mem_root_class;
+extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
+extern spinlock_t ckrm_mem_lock;
+extern int ckrm_nr_mem_classes;
+extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval ;
-#define page_ckrmzone(page) ((page)->ckrm_zone)
-
-#define CLS_SHRINK_BIT (1)
-
-// used in flags. set when a class is more than 90% of its maxlimit
-#define MEM_AT_LIMIT 1
-
-extern void ckrm_init_mm_to_task(struct mm_struct *, struct task_struct *);
-extern void ckrm_mem_evaluate_mm(struct mm_struct *, struct ckrm_mem_res *);
-extern void ckrm_at_limit(struct ckrm_mem_res *);
-extern int ckrm_memclass_valid(struct ckrm_mem_res *);
-extern int ckrm_mem_get_shrink_to(void);
-extern void check_memclass(struct ckrm_mem_res *, char *);
+extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
+extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
+ struct ckrm_mem_res *);
extern void memclass_release(struct kref *);
-
+extern void shrink_get_victims(struct zone *, unsigned long ,
+ unsigned long, struct list_head *);
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
#else
-#define ckrm_init_mm_to_current(a) do {} while (0)
-#define ckrm_mem_evaluate_mm(a) do {} while (0)
-#define ckrm_init_mm_to_task(a,b) do {} while (0)
+#define ckrm_mem_migrate_mm(a, b) do {} while (0)
+#define ckrm_mem_migrate_all_pages(a, b) do {} while (0)
-#endif // CONFIG_CKRM_RES_MEM
+#endif /* CONFIG_CKRM_RES_MEM */
-#endif //_LINUX_CKRM_MEM_H
+#endif /* _LINUX_CKRM_MEM_H */
#ifdef CONFIG_CKRM_RES_MEM
-#define INACTIVE 0
-#define ACTIVE 1
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
static inline struct ckrm_mem_res *
ckrm_get_mem_class(struct task_struct *tsk)
struct ckrm_mem_res);
}
-#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
-
static inline void
ckrm_set_shrink(struct ckrm_zone *cz)
{
clear_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
}
+static inline void
+set_page_ckrmzone( struct page *page, struct ckrm_zone *cz)
+{
+ page->ckrm_zone = cz;
+}
+
+static inline struct ckrm_zone *
+page_ckrmzone(struct page *page)
+{
+ return page->ckrm_zone;
+}
+
/*
* Currently, a shared page that is shared by multiple classes is charged
* to a class with max available guarantee. Simply replace this function
if (a == NULL)
return -(b != NULL);
if (b == NULL)
- return 0;
+ return 1;
if (a->pg_guar == b->pg_guar)
return 0;
if (a->pg_guar == CKRM_SHARE_DONTCARE)
incr_use_count(struct ckrm_mem_res *cls, int borrow)
{
extern int ckrm_mem_shrink_at;
- if (unlikely(!cls))
+ struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
+ mem_rcbs.resid, struct ckrm_mem_res);
+
+ if (!cls)
return;
- BUG_ON(!ckrm_memclass_valid(cls));
- atomic_inc(&cls->pg_total);
+ atomic_inc(&cls->pg_total);
if (borrow)
cls->pg_lent++;
- if ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
- (atomic_read(&cls->pg_total) > cls->pg_unused)) {
- struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
+
+ parcls = ckrm_get_res_class(cls->parent,
mem_rcbs.resid, struct ckrm_mem_res);
- if (parcls) {
- incr_use_count(parcls, 1);
- cls->pg_borrowed++;
- }
- } else {
+ if (parcls && ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
+ (atomic_read(&cls->pg_total) > cls->pg_unused))) {
+ incr_use_count(parcls, 1);
+ cls->pg_borrowed++;
+ } else
atomic_inc(&ckrm_mem_real_count);
- }
- if (unlikely((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+
+ if ((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
(atomic_read(&cls->pg_total) >=
((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
- ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT))) {
- ckrm_at_limit(cls);
+ ((cls->flags & CLS_AT_LIMIT) != CLS_AT_LIMIT)) {
+ ckrm_shrink_atlimit(cls);
}
return;
}
static inline void
decr_use_count(struct ckrm_mem_res *cls, int borrowed)
{
- if (unlikely(!cls))
+ if (!cls)
return;
- BUG_ON(!ckrm_memclass_valid(cls));
atomic_dec(&cls->pg_total);
if (borrowed)
cls->pg_lent--;
static inline void
ckrm_set_page_class(struct page *page, struct ckrm_mem_res *cls)
{
- if (unlikely(cls == NULL)) {
- cls = ckrm_mem_root_class;
- }
- if (likely(cls != NULL)) {
- struct ckrm_zone *czone = &cls->ckrm_zone[page_zonenum(page)];
- if (unlikely(page->ckrm_zone)) {
- kref_put(&cls->nr_users, memclass_release);
- }
- page->ckrm_zone = czone;
- kref_get(&cls->nr_users);
- } else {
- page->ckrm_zone = NULL;
- }
-}
+ struct ckrm_zone *new_czone, *old_czone;
-static inline void
-ckrm_set_pages_class(struct page *pages, int numpages, struct ckrm_mem_res *cls)
-{
- int i;
- for (i = 0; i < numpages; pages++, i++) {
- ckrm_set_page_class(pages, cls);
- }
-}
-
-static inline void
-ckrm_clear_page_class(struct page *page)
-{
- if (likely(page->ckrm_zone != NULL)) {
- if (CkrmAccount(page)) {
- decr_use_count(page->ckrm_zone->memcls, 0);
- ClearCkrmAccount(page);
+ if (!cls) {
+ if (!ckrm_mem_root_class) {
+ set_page_ckrmzone(page, NULL);
+ return;
}
- kref_put(&page->ckrm_zone->memcls->nr_users, memclass_release);
- page->ckrm_zone = NULL;
+ cls = ckrm_mem_root_class;
}
+ new_czone = &cls->ckrm_zone[page_zonenum(page)];
+ old_czone = page_ckrmzone(page);
+
+ if (old_czone)
+ kref_put(&old_czone->memcls->nr_users, memclass_release);
+
+ set_page_ckrmzone(page, new_czone);
+ kref_get(&cls->nr_users);
+ incr_use_count(cls, 0);
+ SetPageCkrmAccount(page);
}
static inline void
ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls)
{
- struct ckrm_zone *old_czone = page->ckrm_zone, *new_czone;
+ struct ckrm_zone *old_czone = page_ckrmzone(page), *new_czone;
struct ckrm_mem_res *oldcls;
- if (unlikely(!old_czone || !newcls)) {
- BUG_ON(CkrmAccount(page));
- return;
+ if (!newcls) {
+ if (!ckrm_mem_root_class)
+ return;
+ newcls = ckrm_mem_root_class;
}
- BUG_ON(!CkrmAccount(page));
oldcls = old_czone->memcls;
- if (oldcls == NULL || (oldcls == newcls))
+ if (oldcls == newcls)
return;
- kref_put(&oldcls->nr_users, memclass_release);
- decr_use_count(oldcls, 0);
-
- page->ckrm_zone = new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+ if (oldcls) {
+ kref_put(&oldcls->nr_users, memclass_release);
+ decr_use_count(oldcls, 0);
+ }
+ new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+ set_page_ckrmzone(page, new_czone);
kref_get(&newcls->nr_users);
incr_use_count(newcls, 0);
}
}
+static inline void
+ckrm_clear_page_class(struct page *page)
+{
+ struct ckrm_zone *czone = page_ckrmzone(page);
+ if (czone != NULL) {
+ if (PageCkrmAccount(page)) {
+ decr_use_count(czone->memcls, 0);
+ ClearPageCkrmAccount(page);
+ }
+ kref_put(&czone->memcls->nr_users, memclass_release);
+ set_page_ckrmzone(page, NULL);
+ }
+}
+
static inline void
ckrm_mem_inc_active(struct page *page)
{
- struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
+ struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
+ ?: ckrm_mem_root_class;
+ struct ckrm_zone *czone;
if (cls == NULL)
return;
- BUG_ON(CkrmAccount(page));
- BUG_ON(page->ckrm_zone != NULL);
ckrm_set_page_class(page, cls);
- incr_use_count(cls, 0);
- SetCkrmAccount(page);
- BUG_ON(page->ckrm_zone == NULL);
- page->ckrm_zone->nr_active++;
- list_add(&page->lru, &page->ckrm_zone->active_list);
+ czone = page_ckrmzone(page);
+ czone->nr_active++;
+ list_add(&page->lru, &czone->active_list);
}
static inline void
ckrm_mem_dec_active(struct page *page)
{
- if (page->ckrm_zone == NULL)
+ struct ckrm_zone *czone = page_ckrmzone(page);
+ if (czone == NULL)
return;
- BUG_ON(page->ckrm_zone->memcls == NULL);
- BUG_ON(!CkrmAccount(page));
list_del(&page->lru);
- page->ckrm_zone->nr_active--;
+ czone->nr_active--;
ckrm_clear_page_class(page);
}
static inline void
ckrm_mem_inc_inactive(struct page *page)
{
- struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
+ struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
+ ?: ckrm_mem_root_class;
+ struct ckrm_zone *czone;
if (cls == NULL)
return;
- BUG_ON(CkrmAccount(page));
- BUG_ON(page->ckrm_zone != NULL);
ckrm_set_page_class(page, cls);
- incr_use_count(cls, 0);
- SetCkrmAccount(page);
- BUG_ON(page->ckrm_zone == NULL);
- page->ckrm_zone->nr_inactive++;
- list_add(&page->lru, &page->ckrm_zone->inactive_list);
+ czone = page_ckrmzone(page);
+ czone->nr_inactive++;
+ list_add(&page->lru, &czone->inactive_list);
}
static inline void
ckrm_mem_dec_inactive(struct page *page)
{
- if (page->ckrm_zone == NULL)
+ struct ckrm_zone *czone = page_ckrmzone(page);
+ if (czone == NULL)
return;
- BUG_ON(page->ckrm_zone->memcls == NULL);
- BUG_ON(!CkrmAccount(page));
- page->ckrm_zone->nr_inactive--;
+ czone->nr_inactive--;
list_del(&page->lru);
ckrm_clear_page_class(page);
}
+static inline void
+ckrm_zone_add_active(struct ckrm_zone *czone, int cnt)
+{
+ czone->nr_active += cnt;
+}
+
+static inline void
+ckrm_zone_add_inactive(struct ckrm_zone *czone, int cnt)
+{
+ czone->nr_inactive += cnt;
+}
+
+static inline void
+ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt)
+{
+ czone->nr_active -= cnt;
+}
+
+static inline void
+ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt)
+{
+ czone->nr_inactive -= cnt;
+}
+
static inline int
ckrm_class_limit_ok(struct ckrm_mem_res *cls)
{
int ret;
- extern int ckrm_mem_fail_over;
if ((mem_rcbs.resid == -1) || !cls) {
return 1;
struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
mem_rcbs.resid, struct ckrm_mem_res);
ret = (parcls ? ckrm_class_limit_ok(parcls) : 0);
- } else {
- ret = (atomic_read(&cls->pg_total) <=
- ((ckrm_mem_fail_over * cls->pg_limit) / 100));
- }
+ } else
+ ret = (atomic_read(&cls->pg_total) <= cls->pg_limit);
+
+ /* If we are failing, just nudge the back end */
+ if (ret == 0)
+ ckrm_shrink_atlimit(cls);
- if (ret == 0) {
- // if we are failing... just nudge the back end
- ckrm_at_limit(cls);
- }
return ret;
}
-// task/mm initializations/cleanup
+static inline void
+ckrm_page_init(struct page *page)
+{
+ page->flags &= ~(1 << PG_ckrm_account);
+ set_page_ckrmzone(page, NULL);
+}
+
+
+/* task/mm initializations/cleanup */
static inline void
ckrm_task_mm_init(struct task_struct *tsk)
}
static inline void
-ckrm_task_change_mm(struct task_struct *tsk, struct mm_struct *oldmm, struct mm_struct *newmm)
+ckrm_task_mm_set(struct mm_struct * mm, struct task_struct *task)
+{
+ spin_lock(&mm->peertask_lock);
+ if (!list_empty(&task->mm_peers)) {
+ printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
+ list_del_init(&task->mm_peers);
+ }
+ list_add_tail(&task->mm_peers, &mm->tasklist);
+ spin_unlock(&mm->peertask_lock);
+ if (mm->memclass != ckrm_get_mem_class(task))
+ ckrm_mem_migrate_mm(mm, NULL);
+ return;
+}
+
+static inline void
+ckrm_task_mm_change(struct task_struct *tsk,
+ struct mm_struct *oldmm, struct mm_struct *newmm)
{
if (oldmm) {
spin_lock(&oldmm->peertask_lock);
list_del(&tsk->mm_peers);
- ckrm_mem_evaluate_mm(oldmm, NULL);
+ ckrm_mem_migrate_mm(oldmm, NULL);
spin_unlock(&oldmm->peertask_lock);
}
spin_lock(&newmm->peertask_lock);
list_add_tail(&tsk->mm_peers, &newmm->tasklist);
- ckrm_mem_evaluate_mm(newmm, NULL);
+ ckrm_mem_migrate_mm(newmm, NULL);
spin_unlock(&newmm->peertask_lock);
}
static inline void
-ckrm_task_clear_mm(struct task_struct *tsk, struct mm_struct *mm)
+ckrm_task_mm_clear(struct task_struct *tsk, struct mm_struct *mm)
{
spin_lock(&mm->peertask_lock);
list_del_init(&tsk->mm_peers);
- ckrm_mem_evaluate_mm(mm, NULL);
+ ckrm_mem_migrate_mm(mm, NULL);
spin_unlock(&mm->peertask_lock);
}
}
}
-static inline void
-ckrm_zone_inc_active(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_init_lists(struct zone *zone) {}
+
+static inline void ckrm_add_tail_inactive(struct page *page)
{
- czone->nr_active += cnt;
+ struct ckrm_zone *ckrm_zone = page_ckrmzone(page);
+ list_add_tail(&page->lru, &ckrm_zone->inactive_list);
}
-static inline void
-ckrm_zone_inc_inactive(struct ckrm_zone *czone, int cnt)
+#else
+
+#define ckrm_shrink_list_empty() (1)
+
+static inline void *
+ckrm_get_memclass(struct task_struct *tsk)
{
- czone->nr_inactive += cnt;
+ return NULL;
}
-static inline void
-ckrm_zone_dec_active(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_clear_page_class(struct page *p) {}
+
+static inline void ckrm_mem_inc_active(struct page *p) {}
+static inline void ckrm_mem_dec_active(struct page *p) {}
+static inline void ckrm_mem_inc_inactive(struct page *p) {}
+static inline void ckrm_mem_dec_inactive(struct page *p) {}
+
+#define ckrm_zone_add_active(a, b) do {} while (0)
+#define ckrm_zone_add_inactive(a, b) do {} while (0)
+#define ckrm_zone_sub_active(a, b) do {} while (0)
+#define ckrm_zone_sub_inactive(a, b) do {} while (0)
+
+#define ckrm_class_limit_ok(a) (1)
+
+static inline void ckrm_page_init(struct page *p) {}
+static inline void ckrm_task_mm_init(struct task_struct *tsk) {}
+static inline void ckrm_task_mm_set(struct mm_struct * mm,
+ struct task_struct *task) {}
+static inline void ckrm_task_mm_change(struct task_struct *tsk,
+ struct mm_struct *oldmm, struct mm_struct *newmm) {}
+static inline void ckrm_task_mm_clear(struct task_struct *tsk,
+ struct mm_struct *mm) {}
+
+static inline void ckrm_mm_init(struct mm_struct *mm) {}
+
+/* using #define instead of static inline as the prototype requires *
+ * data structures that is available only with the controller enabled */
+#define ckrm_mm_setclass(a, b) do {} while(0)
+
+static inline void ckrm_mm_clearclass(struct mm_struct *mm) {}
+
+static inline void ckrm_init_lists(struct zone *zone)
{
- czone->nr_active -= cnt;
+ INIT_LIST_HEAD(&zone->active_list);
+ INIT_LIST_HEAD(&zone->inactive_list);
}
-static inline void
-ckrm_zone_dec_inactive(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_add_tail_inactive(struct page *page)
{
- czone->nr_inactive -= cnt;
+ struct zone *zone = page_zone(page);
+ list_add_tail(&page->lru, &zone->inactive_list);
}
-
-#else // !CONFIG_CKRM_RES_MEM
-
-#define ckrm_set_page_class(a,b) do{}while(0)
-#define ckrm_set_pages_class(a,b,c) do{}while(0)
-#define ckrm_clear_page_class(a) do{}while(0)
-#define ckrm_clear_pages_class(a,b) do{}while(0)
-#define ckrm_change_page_class(a,b) do{}while(0)
-#define ckrm_change_pages_class(a,b,c) do{}while(0)
-#define ckrm_mem_inc_active(a) do{}while(0)
-#define ckrm_mem_dec_active(a) do{}while(0)
-#define ckrm_mem_inc_inactive(a) do{}while(0)
-#define ckrm_mem_dec_inactive(a) do{}while(0)
-#define ckrm_shrink_list_empty() (1)
-#define ckrm_kick_page(a,b) (0)
-#define ckrm_class_limit_ok(a) (1)
-#define ckrm_task_mm_init(a) do{}while(0)
-#define ckrm_task_clear_mm(a, b) do{}while(0)
-#define ckrm_task_change_mm(a, b, c) do{}while(0)
-#define ckrm_mm_init(a) do{}while(0)
-#define ckrm_mm_setclass(a, b) do{}while(0)
-#define ckrm_mm_clearclass(a) do{}while(0)
-#define ckrm_zone_inc_active(a, b) do{}while(0)
-#define ckrm_zone_inc_inactive(a, b) do{}while(0)
-#define ckrm_zone_dec_active(a, b) do{}while(0)
-#define ckrm_zone_dec_inactive(a, b) do{}while(0)
-
-#endif // CONFIG_CKRM_RES_MEM
-
-#endif // _LINUX_CKRM_MEM_INLINE_H_
+#endif
+#endif /* _LINUX_CKRM_MEM_INLINE_H_ */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_CKRM_RES_MEM
struct ckrm_zone *ckrm_zone;
-#endif // CONFIG_CKRM_RES_MEM
+#endif
};
/*
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */
-#ifdef CONFIG_CKRM_RES_MEM
-#define PG_ckrm_account 19 /* This page is accounted by CKRM */
-#endif
-
+#define PG_ckrm_account 20 /* CKRM accounting */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
#endif
#ifdef CONFIG_CKRM_RES_MEM
-#define CkrmAccount(page) test_bit(PG_ckrm_account, &(page)->flags)
-#define SetCkrmAccount(page) set_bit(PG_ckrm_account, &(page)->flags)
-#define ClearCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags)
+#define PageCkrmAccount(page) test_bit(PG_ckrm_account, &(page)->flags)
+#define SetPageCkrmAccount(page) set_bit(PG_ckrm_account, &(page)->flags)
+#define ClearPageCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags)
#endif
struct page; /* forward declaration */
#include <linux/percpu.h>
#include <linux/topology.h>
#include <linux/vs_base.h>
+#include <linux/taskdelays.h>
struct exec_domain;
extern int exec_shield;
struct kioctx default_kioctx;
#ifdef CONFIG_CKRM_RES_MEM
struct ckrm_mem_res *memclass;
- struct list_head tasklist; /* list of all tasks sharing this address space */
- spinlock_t peertask_lock; /* protect above tasklist */
+ struct list_head tasklist; /* tasks sharing this address space */
+ spinlock_t peertask_lock; /* protect tasklist above */
#endif
};
struct mempolicy *mempolicy;
short il_next; /* could be shared with used_math */
#endif
-
#ifdef CONFIG_CKRM
- spinlock_t ckrm_tsklock;
+ spinlock_t ckrm_tsklock;
void *ce_data;
#ifdef CONFIG_CKRM_TYPE_TASKCLASS
- // .. Hubertus should change to CONFIG_CKRM_TYPE_TASKCLASS
struct ckrm_task_class *taskclass;
- struct list_head taskclass_link;
+ struct list_head taskclass_link;
#ifdef CONFIG_CKRM_CPU_SCHEDULE
struct ckrm_cpu_class *cpu_class;
- //track cpu demand of this task
+ /* track cpu demand of this task */
struct ckrm_cpu_demand_stat demand_stat;
-#endif //CONFIG_CKRM_CPU_SCHEDULE
-#endif // CONFIG_CKRM_TYPE_TASKCLASS
+#endif /* CONFIG_CKRM_CPU_SCHEDULE */
+#endif /* CONFIG_CKRM_TYPE_TASKCLASS */
#ifdef CONFIG_CKRM_RES_MEM
- struct list_head mm_peers; // list of tasks using same mm_struct
-#endif // CONFIG_CKRM_RES_MEM
-#endif // CONFIG_CKRM
- struct task_delay_info delays;
+ struct list_head mm_peers; /* list of tasks using same mm_struct */
+#endif
+#endif /* CONFIG_CKRM */
+#ifdef CONFIG_DELAY_ACCT
+ struct task_delay_info delays;
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
#endif
+/* API for registering delay info */
+#ifdef CONFIG_DELAY_ACCT
+
+#define test_delay_flag(tsk,flg) ((tsk)->flags & (flg))
+#define set_delay_flag(tsk,flg) ((tsk)->flags |= (flg))
+#define clear_delay_flag(tsk,flg) ((tsk)->flags &= ~(flg))
+
+#define def_delay_var(var) unsigned long long var
+#define get_delay(tsk,field) ((tsk)->delays.field)
+
+#define start_delay(var) ((var) = sched_clock())
+#define start_delay_set(var,flg) (set_delay_flag(current,flg),(var) = sched_clock())
+
+#define inc_delay(tsk,field) (((tsk)->delays.field)++)
+
+/* because of hardware timer drifts in SMPs and task continue on different cpu
+ * then where the start_ts was taken there is a possibility that
+ * end_ts < start_ts by some usecs. In this case we ignore the diff
+ * and add nothing to the total.
+ */
+#ifdef CONFIG_SMP
+#define test_ts_integrity(start_ts,end_ts) (likely((end_ts) > (start_ts)))
+#else
+#define test_ts_integrity(start_ts,end_ts) (1)
+#endif
+
+#define add_delay_ts(tsk,field,start_ts,end_ts) \
+ do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0)
+
+#define add_delay_clear(tsk,field,start_ts,flg) \
+ do { \
+ unsigned long long now = sched_clock();\
+ add_delay_ts(tsk,field,start_ts,now); \
+ clear_delay_flag(tsk,flg); \
+ } while (0)
+
+static inline void add_io_delay(unsigned long long dstart)
+{
+ struct task_struct * tsk = current;
+ unsigned long long now = sched_clock();
+ unsigned long long val;
+
+ if (test_ts_integrity(dstart,now))
+ val = now - dstart;
+ else
+ val = 0;
+ if (test_delay_flag(tsk,PF_MEMIO)) {
+ tsk->delays.mem_iowait_total += val;
+ tsk->delays.num_memwaits++;
+ } else {
+ tsk->delays.iowait_total += val;
+ tsk->delays.num_iowaits++;
+ }
+ clear_delay_flag(tsk,PF_IOWAIT);
+}
+
+inline static void init_delays(struct task_struct *tsk)
+{
+ memset((void*)&tsk->delays,0,sizeof(tsk->delays));
+}
+
+#else
+
+#define test_delay_flag(tsk,flg) (0)
+#define set_delay_flag(tsk,flg) do { } while (0)
+#define clear_delay_flag(tsk,flg) do { } while (0)
+
+#define def_delay_var(var)
+#define get_delay(tsk,field) (0)
+
+#define start_delay(var) do { } while (0)
+#define start_delay_set(var,flg) do { } while (0)
+
+#define inc_delay(tsk,field) do { } while (0)
+#define add_delay_ts(tsk,field,start_ts,now) do { } while (0)
+#define add_delay_clear(tsk,field,start_ts,flg) do { } while (0)
+#define add_io_delay(dstart) do { } while (0)
+#define init_delays(tsk) do { } while (0)
+#endif
+
#endif /* __KERNEL__ */
#endif
tristate "Null Tasks Resource Manager"
depends on CKRM_TYPE_TASKCLASS
default m
+
+config CKRM_RES_MEM
+ bool "Class based physical memory controller"
+ default y
+ depends on CKRM
+ help
+ Provide the basic support for collecting physical memory usage
+ information among classes. Say Y if you want to know the memory
+ usage of each class.
+
+config CKRM_TYPE_SOCKETCLASS
+ bool "Class Manager for socket groups"
+ depends on CKRM && RCFS_FS
help
Provides a Null Resource Controller for CKRM that is purely for
demonstration purposes.
Say N if unsure, Y to use the feature.
-config CKRM_RES_MEM
- bool "Class based physical memory controller"
- default y
- depends on CKRM
- help
- Provide the basic support for collecting physical memory usage information
- among classes. Say Y if you want to know the memory usage of each class.
-
-config CKRM_MEM_LRUORDER_CHANGE
- bool "Change the LRU ordering of scanned pages"
- default n
- depends on CKRM_RES_MEM
- help
- While trying to free pages, by default(n), scanned pages are left were they
- are found if they belong to relatively under-used class. In this case the
- LRU ordering of the memory subsystemis left intact. If this option is chosen,
- then the scanned pages are moved to the tail of the list(active or inactive).
- Changing this to yes reduces the checking overhead but violates the approximate
- LRU order that is maintained by the paging subsystem.
-
config CKRM_CPU_SCHEDULE_AT_BOOT
bool "Turn on at boot time"
depends on CKRM_CPU_SCHEDULE
obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o
obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o
obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o
-obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o
+obj-$(CONFIG_CKRM_RES_MEM) += ckrm_memcore.o ckrm_memctlr.o
obj-$(CONFIG_CKRM_RES_NULL) += ckrm_null_class.o
+++ /dev/null
-/* ckrm_mem.c - Memory Resource Manager for CKRM
- *
- * Copyright (C) Chandra Seetharaman, IBM Corp. 2004
- *
- * Provides a Memory Resource controller for CKRM
- *
- * Latest version, more details at http://ckrm.sf.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-#include <linux/cache.h>
-#include <linux/percpu.h>
-#include <linux/pagevec.h>
-#include <linux/parser.h>
-#include <linux/ckrm_mem_inline.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/errno.h>
-
-#define MEM_NAME "mem"
-
-#define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2
-
-/* all 1-level memory_share_class are chained together */
-LIST_HEAD(ckrm_memclass_list);
-LIST_HEAD(ckrm_shrink_list);
-spinlock_t ckrm_mem_lock; // protects both lists above
-unsigned int ckrm_tot_lru_pages; // total # of pages in the system
- // currently doesn't handle memory add/remove
-struct ckrm_mem_res *ckrm_mem_root_class;
-atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
-static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *);
-int ckrm_nr_mem_classes = 0;
-
-EXPORT_SYMBOL_GPL(ckrm_memclass_list);
-EXPORT_SYMBOL_GPL(ckrm_shrink_list);
-EXPORT_SYMBOL_GPL(ckrm_mem_lock);
-EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
-EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
-EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
-EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
-
-/* Initialize rescls values
- * May be called on each rcfs unmount or as part of error recovery
- * to make share values sane.
- * Does not traverse hierarchy reinitializing children.
- */
-
-void
-memclass_release(struct kref *kref)
-{
- struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users);
- BUG_ON(ckrm_memclass_valid(cls));
- kfree(cls);
-}
-EXPORT_SYMBOL_GPL(memclass_release);
-
-static void
-set_ckrm_tot_pages(void)
-{
- struct zone *zone;
- int tot_lru_pages = 0;
-
- for_each_zone(zone) {
- tot_lru_pages += zone->nr_active;
- tot_lru_pages += zone->nr_inactive;
- tot_lru_pages += zone->free_pages;
- }
- ckrm_tot_lru_pages = tot_lru_pages;
-}
-
-static void
-mem_res_initcls_one(struct ckrm_mem_res *res)
-{
- int zindex = 0;
- struct zone *zone;
-
- memset(res, 0, sizeof(struct ckrm_mem_res));
-
- res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
- res->shares.my_limit = CKRM_SHARE_DONTCARE;
- res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
- res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
- res->shares.cur_max_limit = 0;
-
- res->pg_guar = CKRM_SHARE_DONTCARE;
- res->pg_limit = CKRM_SHARE_DONTCARE;
-
- INIT_LIST_HEAD(&res->shrink_list);
- INIT_LIST_HEAD(&res->mcls_list);
-
- for_each_zone(zone) {
- INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
- INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
- INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
- res->ckrm_zone[zindex].nr_active = 0;
- res->ckrm_zone[zindex].nr_inactive = 0;
- res->ckrm_zone[zindex].zone = zone;
- res->ckrm_zone[zindex].memcls = res;
- zindex++;
- }
-
- res->pg_unused = 0;
- res->nr_dontcare = 1; // for default class
- kref_init(&res->nr_users);
-}
-
-static void
-set_impl_guar_children(struct ckrm_mem_res *parres)
-{
- ckrm_core_class_t *child = NULL;
- struct ckrm_mem_res *cres;
- int nr_dontcare = 1; // for defaultclass
- int guar, impl_guar;
- int resid = mem_rcbs.resid;
-
- ckrm_lock_hier(parres->core);
- while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
- cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
- // treat NULL cres as don't care as that child is just being
- // created.
- // FIXME: need a better way to handle this case.
- if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
- nr_dontcare++;
- }
- }
-
- parres->nr_dontcare = nr_dontcare;
- guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
- parres->impl_guar : parres->pg_unused;
- impl_guar = guar / parres->nr_dontcare;
-
- while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
- cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
- if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
- cres->impl_guar = impl_guar;
- set_impl_guar_children(cres);
- }
- }
- ckrm_unlock_hier(parres->core);
-
-}
-
-void
-check_memclass(struct ckrm_mem_res *res, char *str)
-{
- int i, act = 0, inact = 0;
- struct zone *zone;
- struct ckrm_zone *ckrm_zone;
- struct list_head *pos;
- struct page *page;
-
- printk("Check<%s> %s: total=%d\n",
- str, res->core->name, atomic_read(&res->pg_total));
- for (i = 0; i < MAX_NR_ZONES; i++) {
- act = 0; inact = 0;
- ckrm_zone = &res->ckrm_zone[i];
- zone = ckrm_zone->zone;
- spin_lock_irq(&zone->lru_lock);
- pos = ckrm_zone->inactive_list.next;
- while (pos != &ckrm_zone->inactive_list) {
- page = list_entry(pos, struct page, lru);
- pos = pos->next;
- inact++;
- }
- pos = ckrm_zone->active_list.next;
- while (pos != &ckrm_zone->active_list) {
- page = list_entry(pos, struct page, lru);
- pos = pos->next;
- act++;
- }
- spin_unlock_irq(&zone->lru_lock);
- printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n",
- str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive,
- act, inact);
- }
-}
-EXPORT_SYMBOL_GPL(check_memclass);
-
-static void *
-mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
-{
- struct ckrm_mem_res *res, *pres;
-
- if (mem_rcbs.resid == -1) {
- return NULL;
- }
-
- pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
- if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
- printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
- CKRM_MEM_MAX_HIERARCHY);
- return NULL;
- }
-
- if (unlikely((parent == NULL) && (ckrm_mem_root_class != NULL))) {
- printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
- return NULL;
- }
-
- if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) {
- printk(KERN_ERR "MEM_RC: child class with no root class!!");
- return NULL;
- }
-
- res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
-
- if (res) {
- mem_res_initcls_one(res);
- res->core = core;
- res->parent = parent;
- spin_lock_irq(&ckrm_mem_lock);
- list_add(&res->mcls_list, &ckrm_memclass_list);
- spin_unlock_irq(&ckrm_mem_lock);
- if (parent == NULL) {
- // I am part of the root class. So, set the max to
- // number of pages available
- res->pg_guar = ckrm_tot_lru_pages;
- res->pg_unused = ckrm_tot_lru_pages;
- res->pg_limit = ckrm_tot_lru_pages;
- res->hier = 0;
- ckrm_mem_root_class = res;
- } else {
- int guar;
- res->hier = pres->hier + 1;
- set_impl_guar_children(pres);
- guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
- pres->impl_guar : pres->pg_unused;
- res->impl_guar = guar / pres->nr_dontcare;
- }
- ckrm_nr_mem_classes++;
- }
- else
- printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
- return res;
-}
-
-/*
- * It is the caller's responsibility to make sure that the parent only
- * has chilren that are to be accounted. i.e if a new child is added
- * this function should be called after it has been added, and if a
- * child is deleted this should be called after the child is removed.
- */
-static void
-child_maxlimit_changed_local(struct ckrm_mem_res *parres)
-{
- int maxlimit = 0;
- struct ckrm_mem_res *childres;
- ckrm_core_class_t *child = NULL;
-
- // run thru parent's children and get the new max_limit of the parent
- ckrm_lock_hier(parres->core);
- while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
- childres = ckrm_get_res_class(child, mem_rcbs.resid,
- struct ckrm_mem_res);
- if (maxlimit < childres->shares.my_limit) {
- maxlimit = childres->shares.my_limit;
- }
- }
- ckrm_unlock_hier(parres->core);
- parres->shares.cur_max_limit = maxlimit;
-}
-
-/*
- * Recalculate the guarantee and limit in # of pages... and propagate the
- * same to children.
- * Caller is responsible for protecting res and for the integrity of parres
- */
-static void
-recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
-{
- ckrm_core_class_t *child = NULL;
- struct ckrm_mem_res *cres;
- int resid = mem_rcbs.resid;
- struct ckrm_shares *self = &res->shares;
-
- if (parres) {
- struct ckrm_shares *par = &parres->shares;
-
- // calculate pg_guar and pg_limit
- //
- if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
- self->my_guarantee == CKRM_SHARE_DONTCARE) {
- res->pg_guar = CKRM_SHARE_DONTCARE;
- } else if (par->total_guarantee) {
- u64 temp = (u64) self->my_guarantee * parres->pg_guar;
- do_div(temp, par->total_guarantee);
- res->pg_guar = (int) temp;
- res->impl_guar = CKRM_SHARE_DONTCARE;
- } else {
- res->pg_guar = 0;
- res->impl_guar = CKRM_SHARE_DONTCARE;
- }
-
- if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
- self->my_limit == CKRM_SHARE_DONTCARE) {
- res->pg_limit = CKRM_SHARE_DONTCARE;
- } else if (par->max_limit) {
- u64 temp = (u64) self->my_limit * parres->pg_limit;
- do_div(temp, par->max_limit);
- res->pg_limit = (int) temp;
- } else {
- res->pg_limit = 0;
- }
- }
-
- // Calculate unused units
- if (res->pg_guar == CKRM_SHARE_DONTCARE) {
- res->pg_unused = CKRM_SHARE_DONTCARE;
- } else if (self->total_guarantee) {
- u64 temp = (u64) self->unused_guarantee * res->pg_guar;
- do_div(temp, self->total_guarantee);
- res->pg_unused = (int) temp;
- } else {
- res->pg_unused = 0;
- }
-
- // propagate to children
- ckrm_lock_hier(res->core);
- while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
- cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
- recalc_and_propagate(cres, res);
- }
- ckrm_unlock_hier(res->core);
- return;
-}
-
-static void
-mem_res_free(void *my_res)
-{
- struct ckrm_mem_res *res = my_res;
- struct ckrm_mem_res *pres;
-
- if (!res)
- return;
-
- ckrm_mem_evaluate_all_pages(res);
-
- pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
- struct ckrm_mem_res);
-
- if (pres) {
- child_guarantee_changed(&pres->shares,
- res->shares.my_guarantee, 0);
- child_maxlimit_changed_local(pres);
- recalc_and_propagate(pres, NULL);
- set_impl_guar_children(pres);
- }
-
- res->shares.my_guarantee = 0;
- res->shares.my_limit = 0;
- res->pg_guar = 0;
- res->pg_limit = 0;
- res->pg_unused = 0;
-
- spin_lock_irq(&ckrm_mem_lock);
- list_del_init(&res->mcls_list);
- spin_unlock_irq(&ckrm_mem_lock);
-
- res->core = NULL;
- res->parent = NULL;
- kref_put(&res->nr_users, memclass_release);
- ckrm_nr_mem_classes--;
- return;
-}
-
-static int
-mem_set_share_values(void *my_res, struct ckrm_shares *shares)
-{
- struct ckrm_mem_res *res = my_res;
- struct ckrm_mem_res *parres;
- int rc;
-
- if (!res)
- return -EINVAL;
-
- parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
- struct ckrm_mem_res);
-
- rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
-
- if ((rc == 0) && (parres != NULL)) {
- child_maxlimit_changed_local(parres);
- recalc_and_propagate(parres, NULL);
- set_impl_guar_children(parres);
- }
-
- return rc;
-}
-
-static int
-mem_get_share_values(void *my_res, struct ckrm_shares *shares)
-{
- struct ckrm_mem_res *res = my_res;
-
- if (!res)
- return -EINVAL;
- *shares = res->shares;
- return 0;
-}
-
-static int
-mem_get_stats(void *my_res, struct seq_file *sfile)
-{
- struct ckrm_mem_res *res = my_res;
- struct zone *zone;
- int active = 0, inactive = 0, fr = 0;
-
- if (!res)
- return -EINVAL;
-
- seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
- if (res == ckrm_mem_root_class) {
- int i = 0;
- for_each_zone(zone) {
- active += zone->nr_active;
- inactive += zone->nr_inactive;
- fr += zone->free_pages;
- i++;
- }
- seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
- ",free=%d\n", ckrm_tot_lru_pages,
- active, inactive, fr);
- }
- seq_printf(sfile, "Number of pages used(including pages lent to"
- " children): %d\n", atomic_read(&res->pg_total));
- seq_printf(sfile, "Number of pages guaranteed: %d\n",
- res->pg_guar);
- seq_printf(sfile, "Maximum limit of pages: %d\n",
- res->pg_limit);
- seq_printf(sfile, "Total number of pages available"
- "(after serving guarantees to children): %d\n",
- res->pg_unused);
- seq_printf(sfile, "Number of pages lent to children: %d\n",
- res->pg_lent);
- seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
- res->pg_borrowed);
- seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
-
- return 0;
-}
-
-static void
-mem_change_resclass(void *tsk, void *old, void *new)
-{
- struct mm_struct *mm;
- struct task_struct *task = tsk, *t1;
- struct ckrm_mem_res *prev_mmcls;
-
- if (!task->mm || (new == old) || (old == (void *) -1))
- return;
-
- mm = task->active_mm;
- spin_lock(&mm->peertask_lock);
- prev_mmcls = mm->memclass;
-
- if (new == NULL) {
- list_del_init(&task->mm_peers);
- } else {
- int found = 0;
- list_for_each_entry(t1, &mm->tasklist, mm_peers) {
- if (t1 == task) {
- found++;
- break;
- }
- }
- if (!found) {
- list_del_init(&task->mm_peers);
- list_add_tail(&task->mm_peers, &mm->tasklist);
- }
- }
-
- spin_unlock(&mm->peertask_lock);
- ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new);
- return;
-}
-
-#define MEM_FAIL_OVER "fail_over"
-#define MEM_SHRINK_AT "shrink_at"
-#define MEM_SHRINK_TO "shrink_to"
-#define MEM_SHRINK_COUNT "num_shrinks"
-#define MEM_SHRINK_INTERVAL "shrink_interval"
-
-int ckrm_mem_fail_over = 110;
-int ckrm_mem_shrink_at = 90;
-static int ckrm_mem_shrink_to = 80;
-static int ckrm_mem_shrink_count = 10;
-static int ckrm_mem_shrink_interval = 10;
-
-EXPORT_SYMBOL_GPL(ckrm_mem_fail_over);
-EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
-
-static int
-mem_show_config(void *my_res, struct seq_file *sfile)
-{
- struct ckrm_mem_res *res = my_res;
-
- if (!res)
- return -EINVAL;
-
- seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
- MEM_NAME,
- MEM_FAIL_OVER, ckrm_mem_fail_over,
- MEM_SHRINK_AT, ckrm_mem_shrink_at,
- MEM_SHRINK_TO, ckrm_mem_shrink_to,
- MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
- MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
-
- return 0;
-}
-
-// config file is available only at the root level,
-// so assuming my_res to be the system level class
-enum memclass_token {
- mem_fail_over,
- mem_shrink_at,
- mem_shrink_to,
- mem_shrink_count,
- mem_shrink_interval,
- mem_err
-};
-
-static match_table_t mem_tokens = {
- {mem_fail_over, MEM_FAIL_OVER "=%d"},
- {mem_shrink_at, MEM_SHRINK_AT "=%d"},
- {mem_shrink_to, MEM_SHRINK_TO "=%d"},
- {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
- {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
- {mem_err, NULL},
-};
-
-static int
-mem_set_config(void *my_res, const char *cfgstr)
-{
- char *p;
- struct ckrm_mem_res *res = my_res;
- int err = 0, val;
-
- if (!res)
- return -EINVAL;
-
- while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, mem_tokens, args);
- switch (token) {
- case mem_fail_over:
- if (match_int(args, &val) || (val <= 0)) {
- err = -EINVAL;
- } else {
- ckrm_mem_fail_over = val;
- }
- break;
- case mem_shrink_at:
- if (match_int(args, &val) || (val <= 0)) {
- err = -EINVAL;
- } else {
- ckrm_mem_shrink_at = val;
- }
- break;
- case mem_shrink_to:
- if (match_int(args, &val) || (val < 0) || (val > 100)) {
- err = -EINVAL;
- } else {
- ckrm_mem_shrink_to = val;
- }
- break;
- case mem_shrink_count:
- if (match_int(args, &val) || (val <= 0)) {
- err = -EINVAL;
- } else {
- ckrm_mem_shrink_count = val;
- }
- break;
- case mem_shrink_interval:
- if (match_int(args, &val) || (val <= 0)) {
- err = -EINVAL;
- } else {
- ckrm_mem_shrink_interval = val;
- }
- break;
- default:
- err = -EINVAL;
- }
- }
- return err;
-}
-
-static int
-mem_reset_stats(void *my_res)
-{
- struct ckrm_mem_res *res = my_res;
- printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
- res->core->name);
- return 0;
-}
-
-struct ckrm_res_ctlr mem_rcbs = {
- .res_name = MEM_NAME,
- .res_hdepth = CKRM_MEM_MAX_HIERARCHY,
- .resid = -1,
- .res_alloc = mem_res_alloc,
- .res_free = mem_res_free,
- .set_share_values = mem_set_share_values,
- .get_share_values = mem_get_share_values,
- .get_stats = mem_get_stats,
- .change_resclass = mem_change_resclass,
- .show_config = mem_show_config,
- .set_config = mem_set_config,
- .reset_stats = mem_reset_stats,
-};
-
-EXPORT_SYMBOL_GPL(mem_rcbs);
-
-int __init
-init_ckrm_mem_res(void)
-{
- struct ckrm_classtype *clstype;
- int resid = mem_rcbs.resid;
-
- set_ckrm_tot_pages();
- spin_lock_init(&ckrm_mem_lock);
- clstype = ckrm_find_classtype_by_name("taskclass");
- if (clstype == NULL) {
- printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
- return -ENOENT;
- }
-
- if (resid == -1) {
- resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
- if (resid != -1) {
- mem_rcbs.classtype = clstype;
- }
- }
- return ((resid < 0) ? resid : 0);
-}
-
-void __exit
-exit_ckrm_mem_res(void)
-{
- ckrm_unregister_res_ctlr(&mem_rcbs);
- mem_rcbs.resid = -1;
-}
-
-module_init(init_ckrm_mem_res)
-module_exit(exit_ckrm_mem_res)
-
-int
-ckrm_mem_get_shrink_to(void)
-{
- return ckrm_mem_shrink_to;
-}
-
-void
-ckrm_at_limit(struct ckrm_mem_res *cls)
-{
- struct zone *zone;
- unsigned long now = jiffies;
-
- if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
- ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
- return;
- }
- if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
- (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
- cls->last_shrink = now;
- cls->shrink_count = 0;
- }
- cls->shrink_count++;
- if (cls->shrink_count > ckrm_mem_shrink_count) {
- return;
- }
- spin_lock_irq(&ckrm_mem_lock);
- list_add(&cls->shrink_list, &ckrm_shrink_list);
- spin_unlock_irq(&ckrm_mem_lock);
- cls->flags |= MEM_AT_LIMIT;
- for_each_zone(zone) {
- wakeup_kswapd(zone);
- break; // only once is enough
- }
-}
-
-static int
-ckrm_mem_evaluate_page_anon(struct page* page)
-{
- struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
- struct ckrm_mem_res* maxshareclass = NULL;
- struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
- struct vm_area_struct *vma;
- struct mm_struct* mm;
- int ret = 0;
-
- spin_lock(&anon_vma->lock);
- BUG_ON(list_empty(&anon_vma->head));
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
- mm = vma->vm_mm;
- if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
- mm->memclass) < 0) {
- maxshareclass = mm->memclass;
- }
- }
- spin_unlock(&anon_vma->lock);
-
- if (!maxshareclass) {
- maxshareclass = ckrm_mem_root_class;
- }
- if (pgcls != maxshareclass) {
- ckrm_change_page_class(page, maxshareclass);
- ret = 1;
- }
- return ret;
-}
-
-static int
-ckrm_mem_evaluate_page_file(struct page* page)
-{
- struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
- struct ckrm_mem_res* maxshareclass = NULL;
- struct address_space *mapping = page->mapping;
- struct vm_area_struct *vma = NULL;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
- struct prio_tree_iter iter;
- struct mm_struct* mm;
- int ret = 0;
-
- if (!mapping)
- return 0;
-
- if (!spin_trylock(&mapping->i_mmap_lock))
- return 0;
-
- vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
- pgoff, pgoff) {
- mm = vma->vm_mm;
- if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
- mm->memclass)<0)
- maxshareclass = mm->memclass;
- }
- spin_unlock(&mapping->i_mmap_lock);
-
- if (!maxshareclass) {
- maxshareclass = ckrm_mem_root_class;
- }
- if (pgcls != maxshareclass) {
- ckrm_change_page_class(page, maxshareclass);
- ret = 1;
- }
- return ret;
-}
-
-static int
-ckrm_mem_evaluate_page(struct page* page)
-{
- int ret = 0;
- BUG_ON(page->ckrm_zone == NULL);
- if (page->mapping) {
- if (PageAnon(page))
- ret = ckrm_mem_evaluate_page_anon(page);
- else
- ret = ckrm_mem_evaluate_page_file(page);
- }
- return ret;
-}
-
-static void
-ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res)
-{
- struct page *page;
- struct ckrm_zone *ckrm_zone;
- struct zone *zone;
- struct list_head *pos, *next;
- int i;
-
- check_memclass(res, "bef_eval_all_pgs");
- for (i = 0; i < MAX_NR_ZONES; i++) {
- ckrm_zone = &res->ckrm_zone[i];
- zone = ckrm_zone->zone;
- spin_lock_irq(&zone->lru_lock);
- pos = ckrm_zone->inactive_list.next;
- while (pos != &ckrm_zone->inactive_list) {
- next = pos->next;
- page = list_entry(pos, struct page, lru);
- if (!ckrm_mem_evaluate_page(page))
- ckrm_change_page_class(page,
- ckrm_mem_root_class);
- pos = next;
- }
- pos = ckrm_zone->active_list.next;
- while (pos != &ckrm_zone->active_list) {
- next = pos->next;
- page = list_entry(pos, struct page, lru);
- if (!ckrm_mem_evaluate_page(page))
- ckrm_change_page_class(page,
- ckrm_mem_root_class);
- pos = next;
- }
- spin_unlock_irq(&zone->lru_lock);
- }
- check_memclass(res, "aft_eval_all_pgs");
- return;
-}
-
-static inline int
-class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
- pmd_t* pmdir, unsigned long address, unsigned long end)
-{
- pte_t *pte;
- unsigned long pmd_end;
-
- if (pmd_none(*pmdir))
- return 0;
- BUG_ON(pmd_bad(*pmdir));
-
- pmd_end = (address+PMD_SIZE)&PMD_MASK;
- if (end>pmd_end)
- end = pmd_end;
-
- do {
- pte = pte_offset_map(pmdir,address);
- if (pte_present(*pte)) {
- struct page *page = pte_page(*pte);
- BUG_ON(mm->memclass == NULL);
- if (page->mapping && page->ckrm_zone) {
- struct zone *zone = page->ckrm_zone->zone;
- spin_lock_irq(&zone->lru_lock);
- ckrm_change_page_class(page, mm->memclass);
- spin_unlock_irq(&zone->lru_lock);
- }
- }
- address += PAGE_SIZE;
- pte_unmap(pte);
- pte++;
- } while(address && (address<end));
- return 0;
-}
-
-static inline int
-class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
- pgd_t* pgdir, unsigned long address, unsigned long end)
-{
- pmd_t* pmd;
- unsigned long pgd_end;
-
- if (pgd_none(*pgdir))
- return 0;
- BUG_ON(pgd_bad(*pgdir));
-
- pmd = pmd_offset(pgdir,address);
- pgd_end = (address+PGDIR_SIZE)&PGDIR_MASK;
-
- if (pgd_end && (end>pgd_end))
- end = pgd_end;
-
- do {
- class_migrate_pmd(mm,vma,pmd,address,end);
- address = (address+PMD_SIZE)&PMD_MASK;
- pmd++;
- } while (address && (address<end));
- return 0;
-}
-
-static inline int
-class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
-{
- pgd_t* pgdir;
- unsigned long address, end;
-
- address = vma->vm_start;
- end = vma->vm_end;
-
- pgdir = pgd_offset(vma->vm_mm, address);
- do {
- class_migrate_pgd(mm,vma,pgdir,address,end);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- pgdir++;
- } while(address && (address<end));
- return 0;
-}
-
-/* this function is called with mm->peertask_lock hold */
-void
-ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
-{
- struct task_struct *task;
- struct ckrm_mem_res *maxshareclass = def;
- struct vm_area_struct *vma;
-
- if (list_empty(&mm->tasklist)) {
- /* We leave the mm->memclass untouched since we believe that one
- * mm with no task associated will be deleted soon or attach
- * with another task later.
- */
- return;
- }
-
- list_for_each_entry(task, &mm->tasklist, mm_peers) {
- struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
- if (!cls)
- continue;
- if (!maxshareclass ||
- ckrm_mem_share_compare(maxshareclass,cls)<0 )
- maxshareclass = cls;
- }
-
- if (maxshareclass && (mm->memclass != maxshareclass)) {
- if (mm->memclass) {
- kref_put(&mm->memclass->nr_users, memclass_release);
- }
- mm->memclass = maxshareclass;
- kref_get(&maxshareclass->nr_users);
-
- /* Go through all VMA to migrate pages */
- down_read(&mm->mmap_sem);
- vma = mm->mmap;
- while(vma) {
- class_migrate_vma(mm, vma);
- vma = vma->vm_next;
- }
- up_read(&mm->mmap_sem);
- }
- return;
-}
-
-void
-ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
-{
- spin_lock(&mm->peertask_lock);
- if (!list_empty(&task->mm_peers)) {
- printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
- list_del_init(&task->mm_peers);
- }
- list_add_tail(&task->mm_peers, &mm->tasklist);
- spin_unlock(&mm->peertask_lock);
- if (mm->memclass != ckrm_get_mem_class(task))
- ckrm_mem_evaluate_mm(mm, NULL);
- return;
-}
-
-int
-ckrm_memclass_valid(struct ckrm_mem_res *cls)
-{
- struct ckrm_mem_res *tmp;
- unsigned long flags;
-
- if (!cls || list_empty(&cls->mcls_list)) {
- return 0;
- }
- spin_lock_irqsave(&ckrm_mem_lock, flags);
- list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
- if (tmp == cls) {
- spin_unlock(&ckrm_mem_lock);
- return 1;
- }
- }
- spin_unlock_irqrestore(&ckrm_mem_lock, flags);
- return 0;
-}
-
-MODULE_LICENSE("GPL");
task_lock(tsk);
tsk->mm = NULL;
up_read(&mm->mmap_sem);
- ckrm_task_clear_mm(tsk, mm);
+ ckrm_task_mm_clear(tsk, mm);
enter_lazy_tlb(mm, current);
task_unlock(tsk);
mmput(mm);
#include <linux/rmap.h>
#include <linux/ckrm_events.h>
#include <linux/ckrm_tsk.h>
+#include <linux/ckrm_tc.h>
#include <linux/ckrm_mem_inline.h>
#include <linux/vs_network.h>
#include <linux/vs_limit.h>
mm->ioctx_list = NULL;
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
mm->free_area_cache = TASK_UNMAPPED_BASE;
- ckrm_mm_init(mm);
+ ckrm_mm_init(mm);
if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
ckrm_mm_setclass(mm, oldmm->memclass);
tsk->mm = mm;
tsk->active_mm = mm;
- ckrm_init_mm_to_task(mm, tsk);
+ ckrm_mm_setclass(mm, oldmm->memclass);
+ ckrm_task_mm_set(mm, tsk);
return 0;
free_pt:
#include <linux/vs_base.h>
#include <linux/vs_limit.h>
#include <linux/nodemask.h>
+#include <linux/ckrm_mem_inline.h>
#include <asm/tlbflush.h>
/* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru);
__free_pages_bulk(page, base, zone, area, order);
- ckrm_clear_page_class(page);
+ ckrm_clear_page_class(page);
ret++;
}
spin_unlock_irqrestore(&zone->lock, flags);
#endif
1 << PG_checked | 1 << PG_mappedtodisk);
page->private = 0;
-#ifdef CONFIG_CKRM_RES_MEM
- page->ckrm_zone = NULL;
-#endif
+ ckrm_page_init(page);
set_page_refs(page, order);
}
*/
can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
- if (!ckrm_class_limit_ok((ckrm_get_mem_class(current)))) {
+ if (!in_interrupt() && !ckrm_class_limit_ok(ckrm_get_mem_class(p)))
return NULL;
- }
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
}
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
zone_names[j], realsize, batch);
-#ifndef CONFIG_CKRM_RES_MEM
- INIT_LIST_HEAD(&zone->active_list);
- INIT_LIST_HEAD(&zone->inactive_list);
-#endif
+ ckrm_init_lists(zone);
zone->nr_scan_active = 0;
zone->nr_scan_inactive = 0;
zone->nr_active = 0;
spin_lock_irqsave(&zone->lru_lock, flags);
if (PageLRU(page) && !PageActive(page)) {
list_del(&page->lru);
-#ifdef CONFIG_CKRM_RES_MEM
- list_add_tail(&page->lru, &ckrm_zone->inactive_list);
-#else
- list_add_tail(&page->lru, &zone->inactive_list);
-#endif
+ ckrm_add_tail_inactive(page);
inc_page_state(pgrotated);
}
if (!test_clear_page_writeback(page))
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
+#include <linux/ckrm_mem.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
nr_taken++;
}
zone->nr_inactive -= nr_taken;
- ckrm_zone_dec_inactive(ckrm_zone, nr_taken);
+ ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
BUG();
list_del(&page->lru);
if (PageActive(page)) {
- ckrm_zone_inc_active(ckrm_zone, 1);
+ ckrm_zone_add_active(ckrm_zone, 1);
zone->nr_active++;
list_add(&page->lru, active_list);
} else {
- ckrm_zone_inc_inactive(ckrm_zone, 1);
+ ckrm_zone_add_inactive(ckrm_zone, 1);
zone->nr_inactive++;
list_add(&page->lru, inactive_list);
}
}
zone->pages_scanned += pgscanned;
zone->nr_active -= pgmoved;
- ckrm_zone_dec_active(ckrm_zone, pgmoved);
+ ckrm_zone_sub_active(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
/*
list_move(&page->lru, inactive_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
- ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
zone->nr_inactive += pgmoved;
+ ckrm_zone_add_inactive(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pgdeactivate += pgmoved;
pgmoved = 0;
spin_lock_irq(&zone->lru_lock);
}
}
- ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
zone->nr_inactive += pgmoved;
+ ckrm_zone_add_inactive(ckrm_zone, pgmoved);
pgdeactivate += pgmoved;
if (buffer_heads_over_limit) {
spin_unlock_irq(&zone->lru_lock);
list_move(&page->lru, active_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
- ckrm_zone_inc_active(ckrm_zone, pgmoved);
zone->nr_active += pgmoved;
+ ckrm_zone_add_active(ckrm_zone, pgmoved);
pgmoved = 0;
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
spin_lock_irq(&zone->lru_lock);
}
}
- ckrm_zone_inc_active(ckrm_zone, pgmoved);
zone->nr_active += pgmoved;
+ ckrm_zone_add_active(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
}
#ifdef CONFIG_CKRM_RES_MEM
-static int
-shrink_weight(struct ckrm_zone *czone)
-{
- u64 temp;
- struct zone *zone = czone->zone;
- struct ckrm_mem_res *cls = czone->memcls;
- int zone_usage, zone_guar, zone_total, guar, ret, cnt;
-
- zone_usage = czone->nr_active + czone->nr_inactive;
- czone->active_over = czone->inactive_over = 0;
-
- if (zone_usage < SWAP_CLUSTER_MAX * 4)
- return 0;
-
- if (cls->pg_guar == CKRM_SHARE_DONTCARE) {
- // no guarantee for this class. use implicit guarantee
- guar = cls->impl_guar / cls->nr_dontcare;
- } else {
- guar = cls->pg_unused / cls->nr_dontcare;
- }
- zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
- temp = (u64) guar * zone_total;
- do_div(temp, ckrm_tot_lru_pages);
- zone_guar = (int) temp;
-
- ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
- (zone_usage - zone_guar) : 0;
- if (ret) {
- cnt = czone->nr_active - (2 * zone_guar / 3);
- if (cnt > 0)
- czone->active_over = cnt;
- cnt = czone->active_over + czone->nr_inactive
- - zone_guar / 3;
- if (cnt > 0)
- czone->inactive_over = cnt;
- }
- return ret;
-}
-
static void
shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
{
break;
}
}
-
- throttle_vm_writeout();
}
}
-/* insert an entry to the list and sort decendently*/
+/* FIXME: This function needs to be given more thought. */
static void
-list_add_sort(struct list_head *entry, struct list_head *head)
+ckrm_shrink_class(struct ckrm_mem_res *cls)
{
- struct ckrm_zone *czone, *new =
- list_entry(entry, struct ckrm_zone, victim_list);
- struct list_head* pos = head->next;
-
- while (pos != head) {
- czone = list_entry(pos, struct ckrm_zone, victim_list);
- if (new->shrink_weight > czone->shrink_weight) {
- __list_add(entry, pos->prev, pos);
- return;
- }
- pos = pos->next;
- }
- list_add_tail(entry, head);
- return;
-}
+ struct scan_control sc;
+ struct zone *zone;
+ int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
-static void
-shrink_choose_victims(struct list_head *victims,
- unsigned long nr_active, unsigned long nr_inactive)
-{
- unsigned long nr;
- struct ckrm_zone* czone;
- struct list_head *pos, *next;
-
- pos = victims->next;
- while ((pos != victims) && (nr_active || nr_inactive)) {
- czone = list_entry(pos, struct ckrm_zone, victim_list);
-
- if (nr_active && czone->active_over) {
- nr = min(nr_active, czone->active_over);
- czone->shrink_active += nr;
- czone->active_over -= nr;
- nr_active -= nr;
+ sc.nr_mapped = read_page_state(nr_mapped);
+ sc.nr_scanned = 0;
+ sc.nr_reclaimed = 0;
+ sc.priority = 0; // always very high priority
+
+ for_each_zone(zone) {
+ int zone_total, zone_limit, active_limit,
+ inactive_limit, clszone_limit;
+ struct ckrm_zone *czone;
+ u64 temp;
+
+ czone = &cls->ckrm_zone[zindex];
+ if (ckrm_test_set_shrink(czone))
+ continue;
+
+ zone->temp_priority = zone->prev_priority;
+ zone->prev_priority = sc.priority;
+
+ zone_total = zone->nr_active + zone->nr_inactive
+ + zone->free_pages;
+
+ temp = (u64) cls->pg_limit * zone_total;
+ do_div(temp, ckrm_tot_lru_pages);
+ zone_limit = (int) temp;
+ clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+ active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
+ inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
+
+ czone->shrink_active = 0;
+ cnt = czone->nr_active + act_credit - active_limit;
+ if (cnt > 0) {
+ czone->shrink_active = (unsigned long) cnt;
+ act_credit = 0;
+ } else {
+ act_credit += cnt;
}
- if (nr_inactive && czone->inactive_over) {
- nr = min(nr_inactive, czone->inactive_over);
- czone->shrink_inactive += nr;
- czone->inactive_over -= nr;
- nr_inactive -= nr;
+ czone->shrink_inactive = 0;
+ cnt = czone->shrink_active + inact_credit +
+ (czone->nr_inactive - inactive_limit);
+ if (cnt > 0) {
+ czone->shrink_inactive = (unsigned long) cnt;
+ inact_credit = 0;
+ } else {
+ inact_credit += cnt;
}
- pos = pos->next;
- }
- pos = victims->next;
- while (pos != victims) {
- czone = list_entry(pos, struct ckrm_zone, victim_list);
- next = pos->next;
- if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
- list_del_init(pos);
- ckrm_clear_shrink(czone);
+
+ if (czone->shrink_active || czone->shrink_inactive) {
+ sc.nr_to_reclaim = czone->shrink_inactive;
+ shrink_ckrmzone(czone, &sc);
}
- pos = next;
- }
- return;
+ zone->prev_priority = zone->temp_priority;
+ zindex++;
+ ckrm_clear_shrink(czone);
+ }
}
static void
-shrink_get_victims(struct zone *zone, unsigned long nr_active,
- unsigned long nr_inactive, struct list_head *victims)
+ckrm_shrink_classes(void)
{
- struct list_head *pos;
struct ckrm_mem_res *cls;
- struct ckrm_zone *czone;
- int zoneindex = zone_idx(zone);
-
- if (ckrm_nr_mem_classes <= 1) {
- if (ckrm_mem_root_class) {
- czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
- if (!ckrm_test_set_shrink(czone)) {
- list_add(&czone->victim_list, victims);
- czone->shrink_active = nr_active;
- czone->shrink_inactive = nr_inactive;
- }
- }
- return;
- }
- spin_lock_irq(&ckrm_mem_lock);
- list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
- czone = cls->ckrm_zone + zoneindex;
- if (ckrm_test_set_shrink(czone))
- continue;
- czone->shrink_active = 0;
- czone->shrink_inactive = 0;
- czone->shrink_weight = shrink_weight(czone);
- if (czone->shrink_weight) {
- list_add_sort(&czone->victim_list, victims);
- } else {
- ckrm_clear_shrink(czone);
- }
- }
- pos = victims->next;
- while (pos != victims) {
- czone = list_entry(pos, struct ckrm_zone, victim_list);
- pos = pos->next;
- }
- shrink_choose_victims(victims, nr_active, nr_inactive);
- spin_unlock_irq(&ckrm_mem_lock);
- pos = victims->next;
- while (pos != victims) {
- czone = list_entry(pos, struct ckrm_zone, victim_list);
- pos = pos->next;
+ spin_lock(&ckrm_mem_lock);
+ while (!ckrm_shrink_list_empty()) {
+ cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+ shrink_list);
+ list_del(&cls->shrink_list);
+ cls->flags &= ~CLS_AT_LIMIT;
+ spin_unlock(&ckrm_mem_lock);
+ ckrm_shrink_class(cls);
+ spin_lock(&ckrm_mem_lock);
}
+ spin_unlock(&ckrm_mem_lock);
}
-#endif /* CONFIG_CKRM_RES_MEM */
+
+#else
+#define ckrm_shrink_classes() do { } while(0)
+#endif
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
czone = list_entry(pos, struct ckrm_zone, victim_list);
next = pos->next;
list_del_init(pos);
- ckrm_clear_shrink(czone);
sc->nr_to_reclaim = czone->shrink_inactive;
shrink_ckrmzone(czone, sc);
+ ckrm_clear_shrink(czone);
pos = next;
}
}
#endif
}
-#ifdef CONFIG_CKRM_RES_MEM
-// This function needs to be given more thought.
-// Shrink the class to be at shrink_to%" of its limit
-static void
-ckrm_shrink_class(struct ckrm_mem_res *cls)
-{
- struct scan_control sc;
- struct zone *zone;
- int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
- int shrink_to = ckrm_mem_get_shrink_to();
-
- sc.nr_mapped = read_page_state(nr_mapped);
- sc.nr_scanned = 0;
- sc.nr_reclaimed = 0;
- sc.priority = 0; // always very high priority
-
- check_memclass(cls, "bef_shnk_cls");
- for_each_zone(zone) {
- int zone_total, zone_limit, active_limit,
- inactive_limit, clszone_limit;
- struct ckrm_zone *czone;
- u64 temp;
-
- czone = &cls->ckrm_zone[zindex];
- if (ckrm_test_set_shrink(czone))
- continue;
-
- zone->temp_priority = zone->prev_priority;
- zone->prev_priority = sc.priority;
-
- zone_total = zone->nr_active + zone->nr_inactive
- + zone->free_pages;
-
- temp = (u64) cls->pg_limit * zone_total;
- do_div(temp, ckrm_tot_lru_pages);
- zone_limit = (int) temp;
- clszone_limit = (shrink_to * zone_limit) / 100;
- active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
- inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
-
- czone->shrink_active = 0;
- cnt = czone->nr_active + act_credit - active_limit;
- if (cnt > 0) {
- czone->shrink_active = (unsigned long) cnt;
- } else {
- act_credit += cnt;
- }
-
- czone->shrink_inactive = 0;
- cnt = czone->shrink_active + inact_credit +
- (czone->nr_inactive - inactive_limit);
- if (cnt > 0) {
- czone->shrink_inactive = (unsigned long) cnt;
- } else {
- inact_credit += cnt;
- }
-
-
- if (czone->shrink_active || czone->shrink_inactive) {
- sc.nr_to_reclaim = czone->shrink_inactive;
- shrink_ckrmzone(czone, &sc);
- }
- zone->prev_priority = zone->temp_priority;
- zindex++;
- ckrm_clear_shrink(czone);
- }
- check_memclass(cls, "aft_shnk_cls");
-}
-
-static void
-ckrm_shrink_classes(void)
-{
- struct ckrm_mem_res *cls;
-
- spin_lock_irq(&ckrm_mem_lock);
- while (!ckrm_shrink_list_empty()) {
- cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
- shrink_list);
- list_del(&cls->shrink_list);
- cls->flags &= ~MEM_AT_LIMIT;
- spin_unlock_irq(&ckrm_mem_lock);
- ckrm_shrink_class(cls);
- spin_lock_irq(&ckrm_mem_lock);
- }
- spin_unlock_irq(&ckrm_mem_lock);
-}
-
-#else
-#define ckrm_shrink_classes() do { } while(0)
-#endif
-
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
if (!ckrm_shrink_list_empty())
ckrm_shrink_classes();
- else
+ else
balance_pgdat(pgdat, 0);
}
return 0;