From: Marc Fiuczynski Date: Fri, 24 Sep 2004 20:37:00 +0000 (+0000) Subject: CKRM e16 memory controller. X-Git-Tag: before-enable-kexec-patch~62 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=ed1ab912d8b6715ea455cb4c206388241a4da942;p=linux-2.6.git CKRM e16 memory controller. This contains a fix I made to make it work with initrd. A variant of this fix will be incorporated in the next release of the CKRM memory controller. --- diff --git a/fs/exec.c b/fs/exec.c index bca37d6c0..90580ec70 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -558,6 +559,18 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); task_unlock(tsk); arch_pick_mmap_layout(mm); +#ifdef CONFIG_CKRM_RES_MEM + if (old_mm) { + spin_lock(&old_mm->peertask_lock); + list_del(&tsk->mm_peers); + ckrm_mem_evaluate_mm(old_mm); + spin_unlock(&old_mm->peertask_lock); + } + spin_lock(&mm->peertask_lock); + list_add_tail(&tsk->mm_peers, &mm->tasklist); + ckrm_mem_evaluate_mm(mm); + spin_unlock(&mm->peertask_lock); +#endif if (old_mm) { if (active_mm != old_mm) BUG(); mmput(old_mm); diff --git a/include/linux/ckrm_mem_inline.h b/include/linux/ckrm_mem_inline.h index 0eb4e49c0..a34679a0d 100644 --- a/include/linux/ckrm_mem_inline.h +++ b/include/linux/ckrm_mem_inline.h @@ -56,6 +56,10 @@ ckrm_mem_share_compare(ckrm_mem_res_t *a, ckrm_mem_res_t *b) return -(b != NULL) ; if (b == NULL) return 0; + if (a->pg_guar == CKRM_SHARE_DONTCARE) + return 1; + if (b->pg_guar == CKRM_SHARE_DONTCARE) + return -1; return (a->pg_unused - b->pg_unused); } @@ -69,34 +73,38 @@ mem_class_get(ckrm_mem_res_t *cls) static inline void mem_class_put(ckrm_mem_res_t *cls) { + if (cls && atomic_dec_and_test(&(cls->nr_users)) ) { printk("freeing memclass %p of \n", cls, cls->core->name); + BUG_ON(ckrm_memclass_valid(cls)); //kfree(cls); } } -static inline int +static inline void incr_use_count(ckrm_mem_res_t *cls, int borrow) { - int over_limit; - atomic_inc(&cls->pg_total); - over_limit = (atomic_read(&cls->pg_total) > ((9 * cls->pg_limit) / 10)); if (borrow) cls->pg_lent++; - if ((cls->pg_guar != CKRM_SHARE_DONTCARE) && + if ((cls->pg_guar == CKRM_SHARE_DONTCARE) || (atomic_read(&cls->pg_total) > cls->pg_unused)) { ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent, mem_rcbs.resid, ckrm_mem_res_t); if (parcls) { - over_limit |= incr_use_count(parcls, 1); + incr_use_count(parcls, 1); cls->pg_borrowed++; - return over_limit; } + } else { + atomic_inc(&ckrm_mem_real_count); } - atomic_inc(&ckrm_mem_real_count); - return over_limit; + if ((cls->pg_limit != CKRM_SHARE_DONTCARE) && + (atomic_read(&cls->pg_total) >= cls->pg_limit) && + ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT)) { + ckrm_at_limit(cls); + } + return; } static inline void @@ -159,10 +167,26 @@ ckrm_clear_pages_class(struct page *pages, int numpages) } static inline void -ckrm_change_page_class(struct page *page, ckrm_mem_res_t *cls) +ckrm_change_page_class(struct page *page, ckrm_mem_res_t *newcls) { + ckrm_mem_res_t *oldcls = page_class(page); + + if (!newcls || oldcls == newcls) + return; + ckrm_clear_page_class(page); - ckrm_set_page_class(page, cls); + ckrm_set_page_class(page, newcls); + if (test_bit(PG_ckrm_account, &page->flags)) { + decr_use_count(oldcls, 0); + incr_use_count(newcls, 0); + if (PageActive(page)) { + oldcls->nr_active[page_zonenum(page)]--; + newcls->nr_active[page_zonenum(page)]++; + } else { + oldcls->nr_inactive[page_zonenum(page)]--; + newcls->nr_inactive[page_zonenum(page)]++; + } + } } static inline void @@ -178,11 +202,16 @@ ckrm_change_pages_class(struct page *pages, int numpages, static inline void ckrm_mem_inc_active(struct page *page) { - ckrm_mem_res_t *cls = page_class(page); - BUG_ON(cls == NULL); - cls->nr_active[page_zonenum(page)]++; - if (incr_use_count(cls, 0)) { - ckrm_near_limit(cls); + ckrm_mem_res_t *cls = page_class(page), *curcls; + if (likely(cls != NULL)) { + BUG_ON(test_bit(PG_ckrm_account, &page->flags)); + if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) { + cls = curcls; + ckrm_change_page_class(page, cls); + } + cls->nr_active[page_zonenum(page)]++; + incr_use_count(cls, 0); + set_bit(PG_ckrm_account, &page->flags); } } @@ -190,20 +219,27 @@ static inline void ckrm_mem_dec_active(struct page *page) { ckrm_mem_res_t *cls = page_class(page); - BUG_ON(cls == NULL); - cls->nr_active[page_zonenum(page)]--; - decr_use_count(cls, 0); + if (likely(cls != NULL)) { + BUG_ON(!test_bit(PG_ckrm_account, &page->flags)); + cls->nr_active[page_zonenum(page)]--; + decr_use_count(cls, 0); + clear_bit(PG_ckrm_account, &page->flags); + } } static inline void ckrm_mem_inc_inactive(struct page *page) { - ckrm_mem_res_t *cls = page_class(page); - BUG_ON(cls == NULL); - cls->nr_inactive[page_zonenum(page)]++; - if (incr_use_count(cls, 0) && - ((cls->flags & MEM_NEAR_LIMIT) != MEM_NEAR_LIMIT)) { - ckrm_near_limit(cls); + ckrm_mem_res_t *cls = page_class(page), *curcls; + if (likely(cls != NULL)) { + BUG_ON(test_bit(PG_ckrm_account, &page->flags)); + if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) { + cls = curcls; + ckrm_change_page_class(page, cls); + } + cls->nr_inactive[page_zonenum(page)]++; + incr_use_count(cls, 0); + set_bit(PG_ckrm_account, &page->flags); } } @@ -211,9 +247,12 @@ static inline void ckrm_mem_dec_inactive(struct page *page) { ckrm_mem_res_t *cls = page_class(page); - BUG_ON(cls == NULL); - cls->nr_inactive[page_zonenum(page)]--; - decr_use_count(cls, 0); + if (likely(cls != NULL)) { + BUG_ON(!test_bit(PG_ckrm_account, &page->flags)); + cls->nr_inactive[page_zonenum(page)]--; + decr_use_count(cls, 0); + clear_bit(PG_ckrm_account, &page->flags); + } } static inline int @@ -232,7 +271,13 @@ ckrm_class_limit_ok(ckrm_mem_res_t *cls) if ((mem_rcbs.resid == -1) || !cls) { return 1; } - return (atomic_read(&cls->pg_total) <= (11 * cls->pg_limit) / 10); + if (cls->pg_limit == CKRM_SHARE_DONTCARE) { + ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent, + mem_rcbs.resid, ckrm_mem_res_t); + return (!parcls ?: ckrm_class_limit_ok(parcls)); + } else { + return (atomic_read(&cls->pg_total) <= (11 * cls->pg_limit) / 10); + } } #else // !CONFIG_CKRM_RES_MEM diff --git a/include/linux/mm.h b/include/linux/mm.h index af2555f60..3fb18934a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -231,6 +231,9 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef CONFIG_CKRM_RES_MEM + void *memclass; +#endif // CONFIG_CKRM_RES_MEM }; /* diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 47762ca69..5edb739b4 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,9 +1,11 @@ +#include static inline void add_page_to_active_list(struct zone *zone, struct page *page) { list_add(&page->lru, &zone->active_list); zone->nr_active++; + ckrm_mem_inc_active(page); } static inline void @@ -11,6 +13,7 @@ add_page_to_inactive_list(struct zone *zone, struct page *page) { list_add(&page->lru, &zone->inactive_list); zone->nr_inactive++; + ckrm_mem_inc_inactive(page); } static inline void @@ -18,6 +21,7 @@ del_page_from_active_list(struct zone *zone, struct page *page) { list_del(&page->lru); zone->nr_active--; + ckrm_mem_dec_active(page); } static inline void @@ -25,6 +29,7 @@ del_page_from_inactive_list(struct zone *zone, struct page *page) { list_del(&page->lru); zone->nr_inactive--; + ckrm_mem_dec_inactive(page); } static inline void @@ -34,7 +39,9 @@ del_page_from_lru(struct zone *zone, struct page *page) if (PageActive(page)) { ClearPageActive(page); zone->nr_active--; + ckrm_mem_dec_active(page); } else { zone->nr_inactive--; + ckrm_mem_dec_inactive(page); } } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c6f5063f0..c70f46a4e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -77,6 +77,7 @@ #define PG_compound 19 /* Part of a compound page */ #define PG_anon 20 /* Anonymous: anon_vma in mapping */ +#define PG_ckrm_account 21 /* This page is accounted by CKRM */ /* diff --git a/include/linux/sched.h b/include/linux/sched.h index ee1bd330d..98f7a1eba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -264,6 +264,11 @@ struct mm_struct { struct kioctx *ioctx_list; struct kioctx default_kioctx; +#ifdef CONFIG_CKRM_RES_MEM + struct ckrm_mem_res *memclass; + struct list_head tasklist; /* list of all tasks sharing this address space */ + spinlock_t peertask_lock; /* protect above tasklist */ +#endif }; extern int mmlist_nr; @@ -591,8 +596,10 @@ struct task_struct { struct ckrm_cpu_class *cpu_class; #endif #endif // CONFIG_CKRM_TYPE_TASKCLASS +#ifdef CONFIG_CKRM_RES_MEM + struct list_head mm_peers; // list of tasks using same mm_struct +#endif // CONFIG_CKRM_RES_MEM #endif // CONFIG_CKRM - struct task_delay_info delays; }; diff --git a/init/Kconfig b/init/Kconfig index 26615b43a..da1b24f0e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -203,6 +203,26 @@ config CKRM_RES_BLKIO Say N if unsure, Y to use the feature. +config CKRM_RES_MEM + bool "Class based physical memory controller" + default y + depends on CKRM + help + Provide the basic support for collecting physical memory usage information + among classes. Say Y if you want to know the memory usage of each class. + +config CKRM_MEM_LRUORDER_CHANGE + bool "Change the LRU ordering of scanned pages" + default n + depends on CKRM_RES_MEM + help + While trying to free pages, by default(n), scanned pages are left were they + are found if they belong to relatively under-used class. In this case the + LRU ordering of the memory subsystemis left intact. If this option is chosen, + then the scanned pages are moved to the tail of the list(active or inactive). + Changing this to yes reduces the checking overhead but violates the approximate + LRU order that is maintained by the paging subsystem. + config CKRM_TYPE_SOCKETCLASS bool "Class Manager for socket groups" depends on CKRM diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile index 8f5e2fbf1..32b576b9b 100644 --- a/kernel/ckrm/Makefile +++ b/kernel/ckrm/Makefile @@ -11,3 +11,4 @@ endif obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_laq.o obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o + obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o diff --git a/kernel/exit.c b/kernel/exit.c index 2f136029c..60075cbb3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -523,6 +524,12 @@ static inline void __exit_mm(struct task_struct * tsk) task_lock(tsk); tsk->mm = NULL; up_read(&mm->mmap_sem); +#ifdef CONFIG_CKRM_RES_MEM + spin_lock(&mm->peertask_lock); + list_del_init(&tsk->mm_peers); + ckrm_mem_evaluate_mm(mm); + spin_unlock(&mm->peertask_lock); +#endif enter_lazy_tlb(mm, current); task_unlock(tsk); mmput(mm); diff --git a/kernel/fork.c b/kernel/fork.c index 144311e8c..195394433 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) ckrm_cb_newtask(tsk); /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); +#ifdef CONFIG_CKRM_RES_MEM + INIT_LIST_HEAD(&tsk->mm_peers); +#endif return tsk; } @@ -423,6 +427,10 @@ static struct mm_struct * mm_init(struct mm_struct * mm) mm->ioctx_list = NULL; mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; +#ifdef CONFIG_CKRM_RES_MEM + INIT_LIST_HEAD(&mm->tasklist); + mm->peertask_lock = SPIN_LOCK_UNLOCKED; +#endif if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; @@ -444,6 +452,10 @@ struct mm_struct * mm_alloc(void) if (mm) { memset(mm, 0, sizeof(*mm)); mm = mm_init(mm); +#ifdef CONFIG_CKRM_RES_MEM + mm->memclass = GET_MEM_CLASS(current); + mem_class_get(mm->memclass); +#endif } return mm; } @@ -459,6 +471,13 @@ void fastcall __mmdrop(struct mm_struct *mm) mm_free_pgd(mm); destroy_context(mm); clr_vx_info(&mm->mm_vx_info); +#ifdef CONFIG_CKRM_RES_MEM + /* class can be null and mm's tasklist can be empty here */ + if (mm->memclass) { + mem_class_put(mm->memclass); + mm->memclass = NULL; + } +#endif free_mm(mm); } @@ -588,6 +607,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) good_mm: tsk->mm = mm; tsk->active_mm = mm; + ckrm_init_mm_to_task(mm, tsk); return 0; free_pt: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 152299c39..675b061b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -276,6 +277,7 @@ free_pages_bulk(struct zone *zone, int count, /* have to delete it as __free_pages_bulk list manipulates */ list_del(&page->lru); __free_pages_bulk(page, base, zone, area, order); + ckrm_clear_page_class(page); ret++; } spin_unlock_irqrestore(&zone->lock, flags); @@ -622,6 +624,10 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, might_sleep_if(wait); + if (!ckrm_class_limit_ok((GET_MEM_CLASS(current)))) { + return NULL; + } + zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ if (zones[0] == NULL) /* no zones in the zonelist */ return NULL; @@ -751,6 +757,7 @@ nopage: return NULL; got_pg: kernel_map_pages(page, 1 << order, 1); + ckrm_set_pages_class(page, 1 << order, GET_MEM_CLASS(current)); return page; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 95e02701a..fa5a5e795 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -37,6 +37,7 @@ #include #include +#include /* possible outcome of pageout() */ typedef enum { @@ -71,6 +72,9 @@ struct scan_control { /* This context's GFP mask */ unsigned int gfp_mask; + /* Flag used by CKRM */ + unsigned int ckrm_flags; + int may_writepage; }; @@ -549,19 +553,23 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) { LIST_HEAD(page_list); struct pagevec pvec; - int max_scan = sc->nr_to_scan; + int max_scan = sc->nr_to_scan, nr_pass; + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; pagevec_init(&pvec, 1); lru_add_drain(); spin_lock_irq(&zone->lru_lock); +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_inactive; while (max_scan > 0) { struct page *page; int nr_taken = 0; int nr_scan = 0; int nr_freed; - while (nr_scan++ < SWAP_CLUSTER_MAX && + while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX && !list_empty(&zone->inactive_list)) { page = lru_to_page(&zone->inactive_list); @@ -579,15 +587,25 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) SetPageLRU(page); list_add(&page->lru, &zone->inactive_list); continue; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->inactive_list); +#else + list_add(&page->lru, &zone->inactive_list); +#endif + continue; } list_add(&page->lru, &page_list); + ckrm_mem_dec_inactive(page); nr_taken++; } zone->nr_inactive -= nr_taken; zone->pages_scanned += nr_taken; spin_unlock_irq(&zone->lru_lock); - if (nr_taken == 0) + if ((bit_flag == 0) && (nr_taken == 0)) goto done; max_scan -= nr_scan; @@ -620,6 +638,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) spin_lock_irq(&zone->lru_lock); } } + if (ckrm_flags && (nr_pass <= 0)) { + goto redo; + } } spin_unlock_irq(&zone->lru_lock); done: @@ -659,11 +680,17 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) long mapped_ratio; long distress; long swap_tendency; + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; + int nr_pass; lru_add_drain(); pgmoved = 0; spin_lock_irq(&zone->lru_lock); - while (pgscanned < nr_pages && !list_empty(&zone->active_list)) { +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_active; + while (pgscanned < nr_pages && !list_empty(&zone->active_list) && + nr_pass) { page = lru_to_page(&zone->active_list); prefetchw_prev_lru_page(page, &zone->active_list, flags); if (!TestClearPageLRU(page)) @@ -679,11 +706,24 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) __put_page(page); SetPageLRU(page); list_add(&page->lru, &zone->active_list); + pgscanned++; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->active_list); +#else + list_add(&page->lru, &zone->active_list); +#endif } else { list_add(&page->lru, &l_hold); + ckrm_mem_dec_active(page); pgmoved++; - } pgscanned++; + } + if (!--nr_pass && ckrm_flags) { + goto redo; + } } zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); @@ -758,6 +798,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) if (!TestClearPageActive(page)) BUG(); list_move(&page->lru, &zone->inactive_list); + ckrm_mem_inc_inactive(page); pgmoved++; if (!pagevec_add(&pvec, page)) { zone->nr_inactive += pgmoved; @@ -786,6 +827,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) BUG(); BUG_ON(!PageActive(page)); list_move(&page->lru, &zone->active_list); + ckrm_mem_inc_active(page); pgmoved++; if (!pagevec_add(&pvec, page)) { zone->nr_active += pgmoved; @@ -833,6 +875,7 @@ shrink_zone(struct zone *zone, struct scan_control *sc) sc->nr_to_reclaim = SWAP_CLUSTER_MAX; while (nr_active || nr_inactive) { + sc->ckrm_flags = ckrm_setup_reclamation(); if (nr_active) { sc->nr_to_scan = min(nr_active, (unsigned long)SWAP_CLUSTER_MAX); @@ -848,9 +891,113 @@ shrink_zone(struct zone *zone, struct scan_control *sc) if (sc->nr_to_reclaim <= 0) break; } + ckrm_teardown_reclamation(); + } +} + +#ifdef CONFIG_CKRM_RES_MEM +// This function needs to be given more thought. +// Shrink the class to be at 90% of its limit +static void +ckrm_shrink_class(ckrm_mem_res_t *cls) +{ + struct scan_control sc; + struct zone *zone; + int zindex = 0, active_credit = 0, inactive_credit = 0; + + if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically + // if it is already set somebody is working on it. so... leave + return; + } + sc.nr_mapped = read_page_state(nr_mapped); + sc.nr_scanned = 0; + sc.ckrm_flags = ckrm_get_reclaim_flags(cls); + sc.nr_reclaimed = 0; + sc.priority = 0; // always very high priority + + for_each_zone(zone) { + int zone_total, zone_limit, active_limit, inactive_limit; + int active_over, inactive_over; + unsigned long nr_active, nr_inactive; + u64 temp; + + zone->temp_priority = zone->prev_priority; + zone->prev_priority = sc.priority; + + zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages; + + temp = (u64) cls->pg_limit * zone_total; + do_div(temp, ckrm_tot_lru_pages); + zone_limit = (int) temp; + active_limit = (6 * zone_limit) / 10; // 2/3rd in active list + inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list + + active_over = cls->nr_active[zindex] - active_limit + active_credit; + inactive_over = active_over + + (cls->nr_inactive[zindex] - inactive_limit) + inactive_credit; + + if (active_over > 0) { + zone->nr_scan_active += active_over + 1; + nr_active = zone->nr_scan_active; + active_credit = 0; + } else { + active_credit += active_over; + nr_active = 0; + } + + if (inactive_over > 0) { + zone->nr_scan_inactive += inactive_over; + nr_inactive = zone->nr_scan_inactive; + inactive_credit = 0; + } else { + inactive_credit += inactive_over; + nr_inactive = 0; + } + while (nr_active || nr_inactive) { + if (nr_active) { + sc.nr_to_scan = min(nr_active, + (unsigned long)SWAP_CLUSTER_MAX); + nr_active -= sc.nr_to_scan; + refill_inactive_zone(zone, &sc); + } + + if (nr_inactive) { + sc.nr_to_scan = min(nr_inactive, + (unsigned long)SWAP_CLUSTER_MAX); + nr_inactive -= sc.nr_to_scan; + shrink_cache(zone, &sc); + if (sc.nr_to_reclaim <= 0) + break; + } + } + zone->prev_priority = zone->temp_priority; + zindex++; } + ckrm_clear_shrink(cls); } +static void +ckrm_shrink_classes(void) +{ + ckrm_mem_res_t *cls; + + spin_lock(&ckrm_mem_lock); + while (!ckrm_shrink_list_empty()) { + cls = list_entry(ckrm_shrink_list.next, ckrm_mem_res_t, + shrink_list); + spin_unlock(&ckrm_mem_lock); + ckrm_shrink_class(cls); + spin_lock(&ckrm_mem_lock); + list_del(&cls->shrink_list); + cls->flags &= ~MEM_AT_LIMIT; + } + spin_unlock(&ckrm_mem_lock); +} + +#else +#define ckrm_shrink_classes() do { } while(0) +#endif + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -1157,6 +1304,9 @@ static int kswapd(void *p) finish_wait(&pgdat->kswapd_wait, &wait); try_to_clip_inodes(); + if (!ckrm_shrink_list_empty()) + ckrm_shrink_classes(); + else balance_pgdat(pgdat, 0); } return 0; @@ -1167,7 +1317,7 @@ static int kswapd(void *p) */ void wakeup_kswapd(struct zone *zone) { - if (zone->free_pages > zone->pages_low) + if ((zone->free_pages > zone->pages_low) && ckrm_shrink_list_empty()) return; if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait)) return;