This commit was manufactured by cvs2svn to create branch
[linux-2.6.git] / mm / vmscan.c
index 8e3b693..e01d5c9 100644 (file)
 #include <asm/div64.h>
 
 #include <linux/swapops.h>
+#include <linux/ckrm_mem.h>
+
+#ifndef AT_LIMIT_SUPPORT
+#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true"
+#undef ckrm_shrink_list_empty
+#define ckrm_shrink_list_empty()               (1)
+#endif
 
 /* possible outcome of pageout() */
 typedef enum {
@@ -71,6 +78,9 @@ struct scan_control {
        /* This context's GFP mask */
        unsigned int gfp_mask;
 
+       /* Flag used by CKRM */
+       unsigned int ckrm_flags;
+
        int may_writepage;
 };
 
@@ -85,6 +95,11 @@ struct shrinker {
        long                    nr;     /* objs pending delete */
 };
 
+
+
+void try_to_clip_inodes(void);
+
+
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
 #ifdef ARCH_HAS_PREFETCH
@@ -354,6 +369,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
                int may_enter_fs;
                int referenced;
 
+               cond_resched();
+
                page = lru_to_page(page_list);
                list_del(&page->lru);
 
@@ -542,19 +559,23 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
 {
        LIST_HEAD(page_list);
        struct pagevec pvec;
-       int max_scan = sc->nr_to_scan;
+       int max_scan = sc->nr_to_scan, nr_pass;
+       unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
 
        pagevec_init(&pvec, 1);
 
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
+redo:
+       ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
+       nr_pass = zone->nr_inactive;
        while (max_scan > 0) {
                struct page *page;
                int nr_taken = 0;
                int nr_scan = 0;
                int nr_freed;
 
-               while (nr_scan++ < SWAP_CLUSTER_MAX &&
+               while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX &&
                                !list_empty(&zone->inactive_list)) {
                        page = lru_to_page(&zone->inactive_list);
 
@@ -572,15 +593,25 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                                SetPageLRU(page);
                                list_add(&page->lru, &zone->inactive_list);
                                continue;
+                       } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
+                               __put_page(page);
+                               SetPageLRU(page);
+#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
+                               list_add_tail(&page->lru, &zone->inactive_list);
+#else
+                               list_add(&page->lru, &zone->inactive_list);
+#endif
+                               continue;
                        }
                        list_add(&page->lru, &page_list);
+                       ckrm_mem_dec_inactive(page);
                        nr_taken++;
                }
                zone->nr_inactive -= nr_taken;
                zone->pages_scanned += nr_taken;
                spin_unlock_irq(&zone->lru_lock);
 
-               if (nr_taken == 0)
+               if ((bit_flag == 0) && (nr_taken == 0))
                        goto done;
 
                max_scan -= nr_scan;
@@ -613,6 +644,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                                spin_lock_irq(&zone->lru_lock);
                        }
                }
+               if (ckrm_flags && (nr_pass <= 0)) {
+                       goto redo;
+               }
        }
        spin_unlock_irq(&zone->lru_lock);
 done:
@@ -652,11 +686,17 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
        long mapped_ratio;
        long distress;
        long swap_tendency;
+       unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
+       int nr_pass;
 
        lru_add_drain();
        pgmoved = 0;
        spin_lock_irq(&zone->lru_lock);
-       while (pgscanned < nr_pages && !list_empty(&zone->active_list)) {
+redo:
+       ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
+       nr_pass = zone->nr_active;
+       while (pgscanned < nr_pages && !list_empty(&zone->active_list) &&
+                                               nr_pass) {
                page = lru_to_page(&zone->active_list);
                prefetchw_prev_lru_page(page, &zone->active_list, flags);
                if (!TestClearPageLRU(page))
@@ -672,11 +712,24 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        __put_page(page);
                        SetPageLRU(page);
                        list_add(&page->lru, &zone->active_list);
+                       pgscanned++;
+               } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
+                       __put_page(page);
+                       SetPageLRU(page);
+#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
+                       list_add_tail(&page->lru, &zone->active_list);
+#else
+                       list_add(&page->lru, &zone->active_list);
+#endif
                } else {
                        list_add(&page->lru, &l_hold);
+                       ckrm_mem_dec_active(page);
                        pgmoved++;
+                       pgscanned++;
+               }
+               if (!--nr_pass && ckrm_flags) {
+                       goto redo;
                }
-               pgscanned++;
        }
        zone->nr_active -= pgmoved;
        spin_unlock_irq(&zone->lru_lock);
@@ -713,6 +766,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                reclaim_mapped = 1;
 
        while (!list_empty(&l_hold)) {
+               cond_resched();
                page = lru_to_page(&l_hold);
                list_del(&page->lru);
                if (page_mapped(page)) {
@@ -750,6 +804,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                if (!TestClearPageActive(page))
                        BUG();
                list_move(&page->lru, &zone->inactive_list);
+               ckrm_mem_inc_inactive(page);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
                        zone->nr_inactive += pgmoved;
@@ -778,6 +833,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        BUG();
                BUG_ON(!PageActive(page));
                list_move(&page->lru, &zone->active_list);
+               ckrm_mem_inc_active(page);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
                        zone->nr_active += pgmoved;
@@ -825,6 +881,7 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
        sc->nr_to_reclaim = SWAP_CLUSTER_MAX;
 
        while (nr_active || nr_inactive) {
+               sc->ckrm_flags = ckrm_setup_reclamation();
                if (nr_active) {
                        sc->nr_to_scan = min(nr_active,
                                        (unsigned long)SWAP_CLUSTER_MAX);
@@ -840,9 +897,118 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
                        if (sc->nr_to_reclaim <= 0)
                                break;
                }
+               ckrm_teardown_reclamation();
+       }
+}
+
+#if defined(CONFIG_CKRM_RES_MEM) && defined(AT_LIMIT_SUPPORT)
+// This function needs to be given more thought.
+// Shrink the class to be at 90% of its limit
+static void
+ckrm_shrink_class(ckrm_mem_res_t *cls)
+{
+       struct scan_control sc;
+       struct zone *zone;
+       int zindex = 0, active_credit = 0, inactive_credit = 0;
+
+       if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically
+               // if it is already set somebody is working on it. so... leave
+               return;
+       }
+       sc.nr_mapped = read_page_state(nr_mapped);
+       sc.nr_scanned = 0;
+       sc.ckrm_flags = ckrm_get_reclaim_flags(cls);
+       sc.nr_reclaimed = 0;
+       sc.priority = 0; // always very high priority
+
+       for_each_zone(zone) {
+               int zone_total, zone_limit, active_limit, inactive_limit;
+               int active_over, inactive_over;
+               unsigned long nr_active, nr_inactive;
+               u64 temp;
+
+               zone->temp_priority = zone->prev_priority;
+               zone->prev_priority = sc.priority;
+
+               zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
+
+               temp = (u64) cls->pg_limit * zone_total;
+               do_div(temp, ckrm_tot_lru_pages);
+               zone_limit = (int) temp;
+               active_limit = (6 * zone_limit) / 10; // 2/3rd in active list
+               inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list
+
+               active_over = cls->nr_active[zindex] - active_limit + active_credit;
+               inactive_over = active_over +
+                               (cls->nr_inactive[zindex] - inactive_limit) + inactive_credit;
+
+               if (active_over > 0) {
+                       zone->nr_scan_active += active_over + 1;
+                       nr_active = zone->nr_scan_active;
+                       active_credit = 0;
+               } else {
+                       active_credit += active_over;
+                       nr_active = 0;
+               }
+
+               if (inactive_over > 0) {
+                       zone->nr_scan_inactive += inactive_over;
+                       nr_inactive = zone->nr_scan_inactive;
+                       inactive_credit = 0;
+               } else {
+                       inactive_credit += inactive_over;
+                       nr_inactive = 0;
+               }
+               while (nr_active || nr_inactive) {
+                       if (nr_active) {
+                               sc.nr_to_scan = min(nr_active,
+                                               (unsigned long)SWAP_CLUSTER_MAX);
+                               nr_active -= sc.nr_to_scan;
+                               refill_inactive_zone(zone, &sc);
+                       }
+       
+                       if (nr_inactive) {
+                               sc.nr_to_scan = min(nr_inactive,
+                                               (unsigned long)SWAP_CLUSTER_MAX);
+                               nr_inactive -= sc.nr_to_scan;
+                               shrink_cache(zone, &sc);
+                               if (sc.nr_to_reclaim <= 0)
+                                       break;
+                       }
+               }
+               zone->prev_priority = zone->temp_priority;
+               zindex++;
        }
+       ckrm_clear_shrink(cls);
 }
 
+static void
+ckrm_shrink_classes(void)
+{
+       ckrm_mem_res_t *cls;
+
+       spin_lock(&ckrm_mem_lock);
+       while (!ckrm_shrink_list_empty()) {
+               cls =  list_entry(ckrm_shrink_list.next, ckrm_mem_res_t,
+                               shrink_list);
+               spin_unlock(&ckrm_mem_lock);
+               ckrm_shrink_class(cls);
+               spin_lock(&ckrm_mem_lock);
+               list_del(&cls->shrink_list);
+               cls->flags &= ~MEM_AT_LIMIT;
+       }
+       spin_unlock(&ckrm_mem_lock);
+}
+
+#else
+
+#if defined(CONFIG_CKRM_RES_MEM) && !defined(AT_LIMIT_SUPPORT)
+#warning "disabling ckrm_at_limit -- setting ckrm_shrink_classes to noop "
+#endif
+
+#define ckrm_shrink_classes()  do { } while(0)
+#endif
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -1147,7 +1313,11 @@ static int kswapd(void *p)
                prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
                schedule();
                finish_wait(&pgdat->kswapd_wait, &wait);
+               try_to_clip_inodes();           
 
+               if (!ckrm_shrink_list_empty())
+                       ckrm_shrink_classes();
+               else
                balance_pgdat(pgdat, 0);
        }
        return 0;
@@ -1158,7 +1328,7 @@ static int kswapd(void *p)
  */
 void wakeup_kswapd(struct zone *zone)
 {
-       if (zone->free_pages > zone->pages_low)
+       if ((zone->free_pages > zone->pages_low) && ckrm_shrink_list_empty())
                return;
        if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
                return;