X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=e7244bc6e4ee79b95453adf33abaf57f9998cd0d;hb=6f71f83334a552167ccbbd42fe5dd979428c89e4;hp=6f7fba5135d74a92f15daeae065818de5de829b2;hpb=8d40237c730b8be87c1b80a5d96b9c603fefa829;p=linux-2.6.git diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f7fba513..e7244bc6e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -39,8 +39,12 @@ #include #include -#include +#ifndef AT_LIMIT_SUPPORT +#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true" +#undef ckrm_shrink_list_empty +#define ckrm_shrink_list_empty() (1) +#endif /* possible outcome of pageout() */ typedef enum { @@ -75,6 +79,9 @@ struct scan_control { /* This context's GFP mask */ unsigned int gfp_mask; + /* Flag used by CKRM */ + unsigned int ckrm_flags; + int may_writepage; }; @@ -364,6 +371,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) int may_enter_fs; int referenced; + cond_resched(); + page = lru_to_page(page_list); list_del(&page->lru); @@ -372,14 +381,14 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) BUG_ON(PageActive(page)); + if (PageWriteback(page)) + goto keep_locked; + sc->nr_scanned++; /* Double the slab pressure for mapped and swapcache pages */ if (page_mapped(page) || PageSwapCache(page)) sc->nr_scanned++; - if (PageWriteback(page)) - goto keep_locked; - referenced = page_referenced(page, 1, sc->priority <= 0); /* In active use or really unfreeable? Activate it. */ if (referenced && page_mapping_inuse(page)) @@ -538,40 +547,32 @@ keep: * For pagecache intensive workloads, the first loop here is the hottest spot * in the kernel (apart from the copy_*_user functions). */ -#ifdef CONFIG_CKRM_RES_MEM -static void shrink_cache(struct ckrm_zone *ckrm_zone, struct scan_control *sc) -#else static void shrink_cache(struct zone *zone, struct scan_control *sc) -#endif { LIST_HEAD(page_list); struct pagevec pvec; - int max_scan = sc->nr_to_scan; -#ifdef CONFIG_CKRM_RES_MEM - struct zone *zone = ckrm_zone->zone; - struct list_head *inactive_list = &ckrm_zone->inactive_list; - struct list_head *active_list = &ckrm_zone->active_list; -#else - struct list_head *inactive_list = &zone->inactive_list; - struct list_head *active_list = &zone->active_list; -#endif + int max_scan = sc->nr_to_scan, nr_pass; + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; pagevec_init(&pvec, 1); lru_add_drain(); spin_lock_irq(&zone->lru_lock); +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_inactive; while (max_scan > 0) { struct page *page; int nr_taken = 0; int nr_scan = 0; int nr_freed; - while (nr_scan++ < SWAP_CLUSTER_MAX && - !list_empty(inactive_list)) { - page = lru_to_page(inactive_list); + while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX && + !list_empty(&zone->inactive_list)) { + page = lru_to_page(&zone->inactive_list); prefetchw_prev_lru_page(page, - inactive_list, flags); + &zone->inactive_list, flags); if (!TestClearPageLRU(page)) BUG(); @@ -582,17 +583,27 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) */ __put_page(page); SetPageLRU(page); - list_add(&page->lru, inactive_list); + list_add(&page->lru, &zone->inactive_list); + continue; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->inactive_list); +#else + list_add(&page->lru, &zone->inactive_list); +#endif continue; } list_add(&page->lru, &page_list); + ckrm_mem_dec_inactive(page); nr_taken++; } zone->nr_inactive -= nr_taken; - ckrm_zone_dec_inactive(ckrm_zone, nr_taken); + zone->pages_scanned += nr_taken; spin_unlock_irq(&zone->lru_lock); - if (nr_taken == 0) + if ((bit_flag == 0) && (nr_taken == 0)) goto done; max_scan -= nr_scan; @@ -615,21 +626,19 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) if (TestSetPageLRU(page)) BUG(); list_del(&page->lru); - if (PageActive(page)) { - ckrm_zone_inc_active(ckrm_zone, 1); - zone->nr_active++; - list_add(&page->lru, active_list); - } else { - ckrm_zone_inc_inactive(ckrm_zone, 1); - zone->nr_inactive++; - list_add(&page->lru, inactive_list); - } + if (PageActive(page)) + add_page_to_active_list(zone, page); + else + add_page_to_inactive_list(zone, page); if (!pagevec_add(&pvec, page)) { spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); spin_lock_irq(&zone->lru_lock); } } + if (ckrm_flags && (nr_pass <= 0)) { + goto redo; + } } spin_unlock_irq(&zone->lru_lock); done: @@ -654,11 +663,7 @@ done: * But we had to alter page->flags anyway. */ static void -#ifdef CONFIG_CKRM_RES_MEM -refill_inactive_zone(struct ckrm_zone *ckrm_zone, struct scan_control *sc) -#else refill_inactive_zone(struct zone *zone, struct scan_control *sc) -#endif { int pgmoved; int pgdeactivate = 0; @@ -673,21 +678,19 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) long mapped_ratio; long distress; long swap_tendency; -#ifdef CONFIG_CKRM_RES_MEM - struct zone *zone = ckrm_zone->zone; - struct list_head *active_list = &ckrm_zone->active_list; - struct list_head *inactive_list = &ckrm_zone->inactive_list; -#else - struct list_head *active_list = &zone->active_list; - struct list_head *inactive_list = &zone->inactive_list; -#endif + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; + int nr_pass; lru_add_drain(); pgmoved = 0; spin_lock_irq(&zone->lru_lock); - while (pgscanned < nr_pages && !list_empty(active_list)) { - page = lru_to_page(active_list); - prefetchw_prev_lru_page(page, active_list, flags); +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_active; + while (pgscanned < nr_pages && !list_empty(&zone->active_list) && + nr_pass) { + page = lru_to_page(&zone->active_list); + prefetchw_prev_lru_page(page, &zone->active_list, flags); if (!TestClearPageLRU(page)) BUG(); list_del(&page->lru); @@ -700,16 +703,27 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) */ __put_page(page); SetPageLRU(page); - list_add(&page->lru, active_list); + list_add(&page->lru, &zone->active_list); + pgscanned++; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->active_list); +#else + list_add(&page->lru, &zone->active_list); +#endif } else { list_add(&page->lru, &l_hold); + ckrm_mem_dec_active(page); pgmoved++; + pgscanned++; + } + if (!--nr_pass && ckrm_flags) { + goto redo; } - pgscanned++; } - zone->pages_scanned += pgscanned; zone->nr_active -= pgmoved; - ckrm_zone_dec_active(ckrm_zone, pgmoved); spin_unlock_irq(&zone->lru_lock); /* @@ -744,6 +758,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) reclaim_mapped = 1; while (!list_empty(&l_hold)) { + cond_resched(); page = lru_to_page(&l_hold); list_del(&page->lru); if (page_mapped(page)) { @@ -767,10 +782,10 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) BUG(); if (!TestClearPageActive(page)) BUG(); - list_move(&page->lru, inactive_list); + list_move(&page->lru, &zone->inactive_list); + ckrm_mem_inc_inactive(page); pgmoved++; if (!pagevec_add(&pvec, page)) { - ckrm_zone_inc_inactive(ckrm_zone, pgmoved); zone->nr_inactive += pgmoved; spin_unlock_irq(&zone->lru_lock); pgdeactivate += pgmoved; @@ -781,7 +796,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) spin_lock_irq(&zone->lru_lock); } } - ckrm_zone_inc_inactive(ckrm_zone, pgmoved); zone->nr_inactive += pgmoved; pgdeactivate += pgmoved; if (buffer_heads_over_limit) { @@ -797,10 +811,10 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) if (TestSetPageLRU(page)) BUG(); BUG_ON(!PageActive(page)); - list_move(&page->lru, active_list); + list_move(&page->lru, &zone->active_list); + ckrm_mem_inc_active(page); pgmoved++; if (!pagevec_add(&pvec, page)) { - ckrm_zone_inc_active(ckrm_zone, pgmoved); zone->nr_active += pgmoved; pgmoved = 0; spin_unlock_irq(&zone->lru_lock); @@ -808,7 +822,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) spin_lock_irq(&zone->lru_lock); } } - ckrm_zone_inc_active(ckrm_zone, pgmoved); zone->nr_active += pgmoved; spin_unlock_irq(&zone->lru_lock); pagevec_release(&pvec); @@ -817,183 +830,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) mod_page_state(pgdeactivate, pgdeactivate); } -#ifdef CONFIG_CKRM_RES_MEM -static int -shrink_weight(struct ckrm_zone *czone) -{ - u64 temp; - struct zone *zone = czone->zone; - struct ckrm_mem_res *cls = czone->memcls; - int zone_usage, zone_guar, zone_total, guar, ret, cnt; - - zone_usage = czone->nr_active + czone->nr_inactive; - czone->active_over = czone->inactive_over = 0; - - if (zone_usage < SWAP_CLUSTER_MAX * 4) - return 0; - - if (cls->pg_guar == CKRM_SHARE_DONTCARE) { - // no guarantee for this class. use implicit guarantee - guar = cls->impl_guar / cls->nr_dontcare; - } else { - guar = cls->pg_unused / cls->nr_dontcare; - } - zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages; - temp = (u64) guar * zone_total; - do_div(temp, ckrm_tot_lru_pages); - zone_guar = (int) temp; - - ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ? - (zone_usage - zone_guar) : 0; - if (ret) { - cnt = czone->nr_active - (2 * zone_guar / 3); - if (cnt > 0) - czone->active_over = cnt; - cnt = czone->active_over + czone->nr_inactive - - zone_guar / 3; - if (cnt > 0) - czone->inactive_over = cnt; - } - return ret; -} - -static void -shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc) -{ - while (czone->shrink_active || czone->shrink_inactive) { - if (czone->shrink_active) { - sc->nr_to_scan = min(czone->shrink_active, - (unsigned long)SWAP_CLUSTER_MAX); - czone->shrink_active -= sc->nr_to_scan; - refill_inactive_zone(czone, sc); - } - if (czone->shrink_inactive) { - sc->nr_to_scan = min(czone->shrink_inactive, - (unsigned long)SWAP_CLUSTER_MAX); - czone->shrink_inactive -= sc->nr_to_scan; - shrink_cache(czone, sc); - if (sc->nr_to_reclaim <= 0) { - czone->shrink_active = 0; - czone->shrink_inactive = 0; - break; - } - } - - throttle_vm_writeout(); - } -} - -/* insert an entry to the list and sort decendently*/ -static void -list_add_sort(struct list_head *entry, struct list_head *head) -{ - struct ckrm_zone *czone, *new = - list_entry(entry, struct ckrm_zone, victim_list); - struct list_head* pos = head->next; - - while (pos != head) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - if (new->shrink_weight > czone->shrink_weight) { - __list_add(entry, pos->prev, pos); - return; - } - pos = pos->next; - } - list_add_tail(entry, head); - return; -} - -static void -shrink_choose_victims(struct list_head *victims, - unsigned long nr_active, unsigned long nr_inactive) -{ - unsigned long nr; - struct ckrm_zone* czone; - struct list_head *pos, *next; - - pos = victims->next; - while ((pos != victims) && (nr_active || nr_inactive)) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - - if (nr_active && czone->active_over) { - nr = min(nr_active, czone->active_over); - czone->shrink_active += nr; - czone->active_over -= nr; - nr_active -= nr; - } - - if (nr_inactive && czone->inactive_over) { - nr = min(nr_inactive, czone->inactive_over); - czone->shrink_inactive += nr; - czone->inactive_over -= nr; - nr_inactive -= nr; - } - pos = pos->next; - } - - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - next = pos->next; - if (czone->shrink_active == 0 && czone->shrink_inactive == 0) { - list_del_init(pos); - ckrm_clear_shrink(czone); - } - pos = next; - } - return; -} - -static void -shrink_get_victims(struct zone *zone, unsigned long nr_active, - unsigned long nr_inactive, struct list_head *victims) -{ - struct list_head *pos; - struct ckrm_mem_res *cls; - struct ckrm_zone *czone; - int zoneindex = zone_idx(zone); - - if (ckrm_nr_mem_classes <= 1) { - if (ckrm_mem_root_class) { - czone = ckrm_mem_root_class->ckrm_zone + zoneindex; - if (!ckrm_test_set_shrink(czone)) { - list_add(&czone->victim_list, victims); - czone->shrink_active = nr_active; - czone->shrink_inactive = nr_inactive; - } - } - return; - } - spin_lock_irq(&ckrm_mem_lock); - list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) { - czone = cls->ckrm_zone + zoneindex; - if (ckrm_test_set_shrink(czone)) - continue; - - czone->shrink_active = 0; - czone->shrink_inactive = 0; - czone->shrink_weight = shrink_weight(czone); - if (czone->shrink_weight) { - list_add_sort(&czone->victim_list, victims); - } else { - ckrm_clear_shrink(czone); - } - } - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - pos = pos->next; - } - shrink_choose_victims(victims, nr_active, nr_inactive); - spin_unlock_irq(&ckrm_mem_lock); - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - pos = pos->next; - } -} -#endif /* CONFIG_CKRM_RES_MEM */ - /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ @@ -1002,9 +838,6 @@ shrink_zone(struct zone *zone, struct scan_control *sc) { unsigned long nr_active; unsigned long nr_inactive; -#ifdef CONFIG_CKRM_RES_MEM - struct ckrm_zone *czone; -#endif /* * Add one to `nr_to_scan' just to make sure that the kernel will @@ -1026,25 +859,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc) sc->nr_to_reclaim = SWAP_CLUSTER_MAX; -#ifdef CONFIG_CKRM_RES_MEM - if (nr_active || nr_inactive) { - struct list_head *pos, *next; - LIST_HEAD(victims); - - shrink_get_victims(zone, nr_active, nr_inactive, &victims); - pos = victims.next; - while (pos != &victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - next = pos->next; - list_del_init(pos); - ckrm_clear_shrink(czone); - sc->nr_to_reclaim = czone->shrink_inactive; - shrink_ckrmzone(czone, sc); - pos = next; - } - } -#else while (nr_active || nr_inactive) { + sc->ckrm_flags = ckrm_setup_reclamation(); if (nr_active) { sc->nr_to_scan = min(nr_active, (unsigned long)SWAP_CLUSTER_MAX); @@ -1060,98 +876,116 @@ shrink_zone(struct zone *zone, struct scan_control *sc) if (sc->nr_to_reclaim <= 0) break; } + ckrm_teardown_reclamation(); } -#endif } -#ifdef CONFIG_CKRM_RES_MEM +#if defined(CONFIG_CKRM_RES_MEM) && defined(AT_LIMIT_SUPPORT) // This function needs to be given more thought. -// Shrink the class to be at shrink_to%" of its limit +// Shrink the class to be at 90% of its limit static void -ckrm_shrink_class(struct ckrm_mem_res *cls) +ckrm_shrink_class(ckrm_mem_res_t *cls) { struct scan_control sc; struct zone *zone; - int zindex = 0, cnt, act_credit = 0, inact_credit = 0; - int shrink_to = ckrm_mem_get_shrink_to(); + int zindex = 0, active_credit = 0, inactive_credit = 0; + if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically + // if it is already set somebody is working on it. so... leave + return; + } sc.nr_mapped = read_page_state(nr_mapped); sc.nr_scanned = 0; + sc.ckrm_flags = ckrm_get_reclaim_flags(cls); sc.nr_reclaimed = 0; sc.priority = 0; // always very high priority - check_memclass(cls, "bef_shnk_cls"); for_each_zone(zone) { - int zone_total, zone_limit, active_limit, - inactive_limit, clszone_limit; - struct ckrm_zone *czone; + int zone_total, zone_limit, active_limit, inactive_limit; + int active_over, inactive_over; + unsigned long nr_active, nr_inactive; u64 temp; - czone = &cls->ckrm_zone[zindex]; - if (ckrm_test_set_shrink(czone)) - continue; - zone->temp_priority = zone->prev_priority; zone->prev_priority = sc.priority; - zone_total = zone->nr_active + zone->nr_inactive - + zone->free_pages; + zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages; temp = (u64) cls->pg_limit * zone_total; do_div(temp, ckrm_tot_lru_pages); zone_limit = (int) temp; - clszone_limit = (shrink_to * zone_limit) / 100; - active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list - inactive_limit = clszone_limit / 3; // 1/3rd in inactive list - - czone->shrink_active = 0; - cnt = czone->nr_active + act_credit - active_limit; - if (cnt > 0) { - czone->shrink_active = (unsigned long) cnt; + active_limit = (6 * zone_limit) / 10; // 2/3rd in active list + inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list + + active_over = cls->nr_active[zindex] - active_limit + active_credit; + inactive_over = active_over + + (cls->nr_inactive[zindex] - inactive_limit) + inactive_credit; + + if (active_over > 0) { + zone->nr_scan_active += active_over + 1; + nr_active = zone->nr_scan_active; + active_credit = 0; } else { - act_credit += cnt; + active_credit += active_over; + nr_active = 0; } - czone->shrink_inactive = 0; - cnt = czone->shrink_active + inact_credit + - (czone->nr_inactive - inactive_limit); - if (cnt > 0) { - czone->shrink_inactive = (unsigned long) cnt; + if (inactive_over > 0) { + zone->nr_scan_inactive += inactive_over; + nr_inactive = zone->nr_scan_inactive; + inactive_credit = 0; } else { - inact_credit += cnt; + inactive_credit += inactive_over; + nr_inactive = 0; } - - - if (czone->shrink_active || czone->shrink_inactive) { - sc.nr_to_reclaim = czone->shrink_inactive; - shrink_ckrmzone(czone, &sc); + while (nr_active || nr_inactive) { + if (nr_active) { + sc.nr_to_scan = min(nr_active, + (unsigned long)SWAP_CLUSTER_MAX); + nr_active -= sc.nr_to_scan; + refill_inactive_zone(zone, &sc); + } + + if (nr_inactive) { + sc.nr_to_scan = min(nr_inactive, + (unsigned long)SWAP_CLUSTER_MAX); + nr_inactive -= sc.nr_to_scan; + shrink_cache(zone, &sc); + if (sc.nr_to_reclaim <= 0) + break; + } } zone->prev_priority = zone->temp_priority; zindex++; - ckrm_clear_shrink(czone); } - check_memclass(cls, "aft_shnk_cls"); + ckrm_clear_shrink(cls); } static void ckrm_shrink_classes(void) { - struct ckrm_mem_res *cls; + ckrm_mem_res_t *cls; - spin_lock_irq(&ckrm_mem_lock); + spin_lock(&ckrm_mem_lock); while (!ckrm_shrink_list_empty()) { - cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res, + cls = list_entry(ckrm_shrink_list.next, ckrm_mem_res_t, shrink_list); + spin_unlock(&ckrm_mem_lock); + ckrm_shrink_class(cls); + spin_lock(&ckrm_mem_lock); list_del(&cls->shrink_list); cls->flags &= ~MEM_AT_LIMIT; - spin_unlock_irq(&ckrm_mem_lock); - ckrm_shrink_class(cls); - spin_lock_irq(&ckrm_mem_lock); } - spin_unlock_irq(&ckrm_mem_lock); + spin_unlock(&ckrm_mem_lock); + throttle_vm_writeout(); } #else + +#if defined(CONFIG_CKRM_RES_MEM) && !defined(AT_LIMIT_SUPPORT) +#warning "disabling ckrm_at_limit -- setting ckrm_shrink_classes to noop " +#endif + #define ckrm_shrink_classes() do { } while(0) #endif @@ -1391,7 +1225,6 @@ scan: shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_reclaimed += sc.nr_reclaimed; - total_scanned += sc.nr_scanned; if (zone->all_unreclaimable) continue; if (zone->pages_scanned >= (zone->nr_active + @@ -1493,7 +1326,7 @@ static int kswapd(void *p) if (!ckrm_shrink_list_empty()) ckrm_shrink_classes(); else - balance_pgdat(pgdat, 0); + balance_pgdat(pgdat, 0); } return 0; } @@ -1570,7 +1403,7 @@ static int __init kswapd_init(void) swap_setup(); for_each_pgdat(pgdat) pgdat->kswapd - = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); + = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); total_memory = nr_free_pagecache_pages(); hotcpu_notifier(cpu_callback, 0); return 0;