X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=0908b4e1539fefed993356903bd77f072a6c2428;hb=041b7df03818bb535b04201c39a6b290bf2f36f0;hp=e5f0b091936f4fe37b10098b46f3b3def081b976;hpb=a7f82aceea1d136c4b94c862f44c3ab1a50bb4e2;p=linux-2.6.git diff --git a/mm/vmscan.c b/mm/vmscan.c index e5f0b0919..0908b4e15 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -38,6 +38,7 @@ #include #include +#include /* * From 0 .. 100. Higher means more swappy. @@ -329,6 +330,9 @@ struct scan_control { /* This context's GFP mask */ unsigned int gfp_mask; + /* Flag used by CKRM */ + unsigned int ckrm_flags; + int may_writepage; }; @@ -539,19 +543,23 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) { LIST_HEAD(page_list); struct pagevec pvec; - int max_scan = sc->nr_to_scan; + int max_scan = sc->nr_to_scan, nr_pass; + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; pagevec_init(&pvec, 1); lru_add_drain(); spin_lock_irq(&zone->lru_lock); +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_inactive; while (max_scan > 0) { struct page *page; int nr_taken = 0; int nr_scan = 0; int nr_freed; - while (nr_scan++ < SWAP_CLUSTER_MAX && + while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX && !list_empty(&zone->inactive_list)) { page = lru_to_page(&zone->inactive_list); @@ -569,15 +577,25 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) SetPageLRU(page); list_add(&page->lru, &zone->inactive_list); continue; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->inactive_list); +#else + list_add(&page->lru, &zone->inactive_list); +#endif + continue; } list_add(&page->lru, &page_list); + ckrm_mem_dec_inactive(page); nr_taken++; } zone->nr_inactive -= nr_taken; zone->pages_scanned += nr_taken; spin_unlock_irq(&zone->lru_lock); - if (nr_taken == 0) + if ((bit_flag == 0) && (nr_taken == 0)) goto done; max_scan -= nr_scan; @@ -609,6 +627,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) spin_lock_irq(&zone->lru_lock); } } + if (ckrm_flags && (nr_pass <= 0)) { + goto redo; + } } spin_unlock_irq(&zone->lru_lock); done: @@ -648,10 +669,15 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) long mapped_ratio; long distress; long swap_tendency; + unsigned int ckrm_flags = sc->ckrm_flags, bit_flag; + int nr_pass; lru_add_drain(); pgmoved = 0; spin_lock_irq(&zone->lru_lock); +redo: + ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag); + nr_pass = zone->nr_active; while (pgscanned < nr_pages && !list_empty(&zone->active_list)) { page = lru_to_page(&zone->active_list); prefetchw_prev_lru_page(page, &zone->active_list, flags); @@ -668,11 +694,24 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) __put_page(page); SetPageLRU(page); list_add(&page->lru, &zone->active_list); + pgscanned++; + } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) { + __put_page(page); + SetPageLRU(page); +#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE + list_add_tail(&page->lru, &zone->active_list); +#else + list_add(&page->lru, &zone->active_list); +#endif } else { list_add(&page->lru, &l_hold); + ckrm_mem_dec_active(page); pgmoved++; - } pgscanned++; + } + if (ckrm_flags && !--nr_pass) { + goto redo; + } } zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); @@ -746,6 +785,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) if (!TestClearPageActive(page)) BUG(); list_move(&page->lru, &zone->inactive_list); + ckrm_mem_inc_inactive(page); pgmoved++; if (!pagevec_add(&pvec, page)) { zone->nr_inactive += pgmoved; @@ -774,6 +814,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) BUG(); BUG_ON(!PageActive(page)); list_move(&page->lru, &zone->active_list); + ckrm_mem_inc_active(page); pgmoved++; if (!pagevec_add(&pvec, page)) { zone->nr_active += pgmoved; @@ -826,6 +867,7 @@ shrink_zone(struct zone *zone, struct scan_control *sc) scan_active = (unsigned long)tmp; } + sc->ckrm_flags = ckrm_setup_reclamation(); atomic_add(scan_active + 1, &zone->nr_scan_active); count = atomic_read(&zone->nr_scan_active); if (count >= SWAP_CLUSTER_MAX) { @@ -841,8 +883,101 @@ shrink_zone(struct zone *zone, struct scan_control *sc) sc->nr_to_scan = count; shrink_cache(zone, sc); } + ckrm_teardown_reclamation(); } +#ifdef CONFIG_CKRM_RES_MEM +// This function needs to be given more thought. +static void +ckrm_shrink_class(ckrm_mem_res_t *cls) +{ + struct scan_control sc; + struct zone *zone; + int zindex = 0, active_credit = 0, inactive_credit = 0; + + if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically + // if it is already set somebody is working on it. so... leave + return; + } + sc.nr_mapped = read_page_state(nr_mapped); + sc.nr_scanned = 0; + sc.ckrm_flags = ckrm_get_reclaim_flags(cls); + sc.nr_reclaimed = 0; + sc.priority = 0; // always very high priority + + for_each_zone(zone) { + int zone_total, zone_limit, active_limit, inactive_limit; + int active_over, inactive_over, count; + u64 temp; + + zone->temp_priority = zone->prev_priority; + zone->prev_priority = sc.priority; + + zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages; + + temp = (u64) cls->pg_limit * zone_total; + do_div(temp, ckrm_tot_lru_pages); + zone_limit = (int) temp; + active_limit = (6 * zone_limit) / 10; // 2/3rd in active list + inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list + + active_over = cls->nr_active[zindex] - active_limit + active_credit; + inactive_over = active_over + + (cls->nr_inactive[zindex] - inactive_limit) + inactive_credit; + + if (active_over > 0) { + atomic_add(active_over + 1, &zone->nr_scan_active); + count = atomic_read(&zone->nr_scan_active); + if (count >= SWAP_CLUSTER_MAX) { + atomic_set(&zone->nr_scan_active, 0); + sc.nr_to_scan = count; + refill_inactive_zone(zone, &sc); + } + active_credit = 0; + } else { + active_credit = active_over; + } + + if (inactive_over > 0) { + atomic_add(inactive_over, &zone->nr_scan_inactive); + count = atomic_read(&zone->nr_scan_inactive); + if (count >= SWAP_CLUSTER_MAX) { + atomic_set(&zone->nr_scan_inactive, 0); + sc.nr_to_scan = count; + shrink_cache(zone, &sc); + } + inactive_credit = 0; + } else { + inactive_credit = inactive_over; + } + zone->prev_priority = zone->temp_priority; + zindex++; + } + ckrm_clear_shrink(cls); +} + +static void +ckrm_shrink_classes(void) +{ + ckrm_mem_res_t *cls; + + spin_lock(&ckrm_mem_lock); + while (!ckrm_shrink_list_empty()) { + cls = list_entry(ckrm_shrink_list.next, ckrm_mem_res_t, + shrink_list); + spin_unlock(&ckrm_mem_lock); + ckrm_shrink_class(cls); + spin_lock(&ckrm_mem_lock); + list_del(&cls->shrink_list); + cls->flags &= ~MEM_NEAR_LIMIT; + } + spin_unlock(&ckrm_mem_lock); +} + +#else +#define ckrm_shrink_classes() do { } while(0) +#endif + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -1137,6 +1272,9 @@ int kswapd(void *p) schedule(); finish_wait(&pgdat->kswapd_wait, &wait); + if (!ckrm_shrink_list_empty()) + ckrm_shrink_classes(); + else balance_pgdat(pgdat, 0); } } @@ -1146,7 +1284,7 @@ int kswapd(void *p) */ void wakeup_kswapd(struct zone *zone) { - if (zone->free_pages > zone->pages_low) + if ((zone->free_pages > zone->pages_low) && ckrm_shrink_list_empty()) return; if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait)) return;