#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
-#include <linux/ckrm_mem.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
nr_taken++;
}
zone->nr_inactive -= nr_taken;
- ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
+ ckrm_zone_dec_inactive(ckrm_zone, nr_taken);
spin_unlock_irq(&zone->lru_lock);
if (nr_taken == 0)
BUG();
list_del(&page->lru);
if (PageActive(page)) {
- ckrm_zone_add_active(ckrm_zone, 1);
+ ckrm_zone_inc_active(ckrm_zone, 1);
zone->nr_active++;
list_add(&page->lru, active_list);
} else {
- ckrm_zone_add_inactive(ckrm_zone, 1);
+ ckrm_zone_inc_inactive(ckrm_zone, 1);
zone->nr_inactive++;
list_add(&page->lru, inactive_list);
}
}
zone->pages_scanned += pgscanned;
zone->nr_active -= pgmoved;
- ckrm_zone_sub_active(ckrm_zone, pgmoved);
+ ckrm_zone_dec_active(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
/*
list_move(&page->lru, inactive_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
+ ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
zone->nr_inactive += pgmoved;
- ckrm_zone_add_inactive(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pgdeactivate += pgmoved;
pgmoved = 0;
spin_lock_irq(&zone->lru_lock);
}
}
+ ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
zone->nr_inactive += pgmoved;
- ckrm_zone_add_inactive(ckrm_zone, pgmoved);
pgdeactivate += pgmoved;
if (buffer_heads_over_limit) {
spin_unlock_irq(&zone->lru_lock);
list_move(&page->lru, active_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
+ ckrm_zone_inc_active(ckrm_zone, pgmoved);
zone->nr_active += pgmoved;
- ckrm_zone_add_active(ckrm_zone, pgmoved);
pgmoved = 0;
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
spin_lock_irq(&zone->lru_lock);
}
}
+ ckrm_zone_inc_active(ckrm_zone, pgmoved);
zone->nr_active += pgmoved;
- ckrm_zone_add_active(ckrm_zone, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
}
#ifdef CONFIG_CKRM_RES_MEM
+static int
+shrink_weight(struct ckrm_zone *czone)
+{
+ u64 temp;
+ struct zone *zone = czone->zone;
+ struct ckrm_mem_res *cls = czone->memcls;
+ int zone_usage, zone_guar, zone_total, guar, ret, cnt;
+
+ zone_usage = czone->nr_active + czone->nr_inactive;
+ czone->active_over = czone->inactive_over = 0;
+
+ if (zone_usage < SWAP_CLUSTER_MAX * 4)
+ return 0;
+
+ if (cls->pg_guar == CKRM_SHARE_DONTCARE) {
+ // no guarantee for this class. use implicit guarantee
+ guar = cls->impl_guar / cls->nr_dontcare;
+ } else {
+ guar = cls->pg_unused / cls->nr_dontcare;
+ }
+ zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
+ temp = (u64) guar * zone_total;
+ do_div(temp, ckrm_tot_lru_pages);
+ zone_guar = (int) temp;
+
+ ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
+ (zone_usage - zone_guar) : 0;
+ if (ret) {
+ cnt = czone->nr_active - (2 * zone_guar / 3);
+ if (cnt > 0)
+ czone->active_over = cnt;
+ cnt = czone->active_over + czone->nr_inactive
+ - zone_guar / 3;
+ if (cnt > 0)
+ czone->inactive_over = cnt;
+ }
+ return ret;
+}
+
static void
shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
{
break;
}
}
+
+ throttle_vm_writeout();
}
}
-/* FIXME: This function needs to be given more thought. */
+/* insert an entry to the list and sort decendently*/
static void
-ckrm_shrink_class(struct ckrm_mem_res *cls)
+list_add_sort(struct list_head *entry, struct list_head *head)
{
- struct scan_control sc;
- struct zone *zone;
- int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
-
- sc.nr_mapped = read_page_state(nr_mapped);
- sc.nr_scanned = 0;
- sc.nr_reclaimed = 0;
- sc.priority = 0; // always very high priority
-
- for_each_zone(zone) {
- int zone_total, zone_limit, active_limit,
- inactive_limit, clszone_limit;
- struct ckrm_zone *czone;
- u64 temp;
-
- czone = &cls->ckrm_zone[zindex];
- if (ckrm_test_set_shrink(czone))
- continue;
-
- zone->temp_priority = zone->prev_priority;
- zone->prev_priority = sc.priority;
-
- zone_total = zone->nr_active + zone->nr_inactive
- + zone->free_pages;
-
- temp = (u64) cls->pg_limit * zone_total;
- do_div(temp, ckrm_tot_lru_pages);
- zone_limit = (int) temp;
- clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
- active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
- inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
-
- czone->shrink_active = 0;
- cnt = czone->nr_active + act_credit - active_limit;
- if (cnt > 0) {
- czone->shrink_active = (unsigned long) cnt;
- act_credit = 0;
- } else {
- act_credit += cnt;
+ struct ckrm_zone *czone, *new =
+ list_entry(entry, struct ckrm_zone, victim_list);
+ struct list_head* pos = head->next;
+
+ while (pos != head) {
+ czone = list_entry(pos, struct ckrm_zone, victim_list);
+ if (new->shrink_weight > czone->shrink_weight) {
+ __list_add(entry, pos->prev, pos);
+ return;
}
+ pos = pos->next;
+ }
+ list_add_tail(entry, head);
+ return;
+}
- czone->shrink_inactive = 0;
- cnt = czone->shrink_active + inact_credit +
- (czone->nr_inactive - inactive_limit);
- if (cnt > 0) {
- czone->shrink_inactive = (unsigned long) cnt;
- inact_credit = 0;
- } else {
- inact_credit += cnt;
+static void
+shrink_choose_victims(struct list_head *victims,
+ unsigned long nr_active, unsigned long nr_inactive)
+{
+ unsigned long nr;
+ struct ckrm_zone* czone;
+ struct list_head *pos, *next;
+
+ pos = victims->next;
+ while ((pos != victims) && (nr_active || nr_inactive)) {
+ czone = list_entry(pos, struct ckrm_zone, victim_list);
+
+ if (nr_active && czone->active_over) {
+ nr = min(nr_active, czone->active_over);
+ czone->shrink_active += nr;
+ czone->active_over -= nr;
+ nr_active -= nr;
}
-
- if (czone->shrink_active || czone->shrink_inactive) {
- sc.nr_to_reclaim = czone->shrink_inactive;
- shrink_ckrmzone(czone, &sc);
+ if (nr_inactive && czone->inactive_over) {
+ nr = min(nr_inactive, czone->inactive_over);
+ czone->shrink_inactive += nr;
+ czone->inactive_over -= nr;
+ nr_inactive -= nr;
}
- zone->prev_priority = zone->temp_priority;
- zindex++;
- ckrm_clear_shrink(czone);
+ pos = pos->next;
}
+
+ pos = victims->next;
+ while (pos != victims) {
+ czone = list_entry(pos, struct ckrm_zone, victim_list);
+ next = pos->next;
+ if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
+ list_del_init(pos);
+ ckrm_clear_shrink(czone);
+ }
+ pos = next;
+ }
+ return;
}
static void
-ckrm_shrink_classes(void)
+shrink_get_victims(struct zone *zone, unsigned long nr_active,
+ unsigned long nr_inactive, struct list_head *victims)
{
+ struct list_head *pos;
struct ckrm_mem_res *cls;
+ struct ckrm_zone *czone;
+ int zoneindex = zone_idx(zone);
+
+ if (ckrm_nr_mem_classes <= 1) {
+ if (ckrm_mem_root_class) {
+ czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
+ if (!ckrm_test_set_shrink(czone)) {
+ list_add(&czone->victim_list, victims);
+ czone->shrink_active = nr_active;
+ czone->shrink_inactive = nr_inactive;
+ }
+ }
+ return;
+ }
+ spin_lock_irq(&ckrm_mem_lock);
+ list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
+ czone = cls->ckrm_zone + zoneindex;
+ if (ckrm_test_set_shrink(czone))
+ continue;
- spin_lock(&ckrm_mem_lock);
- while (!ckrm_shrink_list_empty()) {
- cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
- shrink_list);
- list_del(&cls->shrink_list);
- cls->flags &= ~CLS_AT_LIMIT;
- spin_unlock(&ckrm_mem_lock);
- ckrm_shrink_class(cls);
- spin_lock(&ckrm_mem_lock);
+ czone->shrink_active = 0;
+ czone->shrink_inactive = 0;
+ czone->shrink_weight = shrink_weight(czone);
+ if (czone->shrink_weight) {
+ list_add_sort(&czone->victim_list, victims);
+ } else {
+ ckrm_clear_shrink(czone);
+ }
+ }
+ pos = victims->next;
+ while (pos != victims) {
+ czone = list_entry(pos, struct ckrm_zone, victim_list);
+ pos = pos->next;
+ }
+ shrink_choose_victims(victims, nr_active, nr_inactive);
+ spin_unlock_irq(&ckrm_mem_lock);
+ pos = victims->next;
+ while (pos != victims) {
+ czone = list_entry(pos, struct ckrm_zone, victim_list);
+ pos = pos->next;
}
- spin_unlock(&ckrm_mem_lock);
}
-
-#else
-#define ckrm_shrink_classes() do { } while(0)
-#endif
+#endif /* CONFIG_CKRM_RES_MEM */
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
czone = list_entry(pos, struct ckrm_zone, victim_list);
next = pos->next;
list_del_init(pos);
+ ckrm_clear_shrink(czone);
sc->nr_to_reclaim = czone->shrink_inactive;
shrink_ckrmzone(czone, sc);
- ckrm_clear_shrink(czone);
pos = next;
}
}
#endif
}
+#ifdef CONFIG_CKRM_RES_MEM
+// This function needs to be given more thought.
+// Shrink the class to be at shrink_to%" of its limit
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+ struct scan_control sc;
+ struct zone *zone;
+ int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+ int shrink_to = ckrm_mem_get_shrink_to();
+
+ sc.nr_mapped = read_page_state(nr_mapped);
+ sc.nr_scanned = 0;
+ sc.nr_reclaimed = 0;
+ sc.priority = 0; // always very high priority
+
+ check_memclass(cls, "bef_shnk_cls");
+ for_each_zone(zone) {
+ int zone_total, zone_limit, active_limit,
+ inactive_limit, clszone_limit;
+ struct ckrm_zone *czone;
+ u64 temp;
+
+ czone = &cls->ckrm_zone[zindex];
+ if (ckrm_test_set_shrink(czone))
+ continue;
+
+ zone->temp_priority = zone->prev_priority;
+ zone->prev_priority = sc.priority;
+
+ zone_total = zone->nr_active + zone->nr_inactive
+ + zone->free_pages;
+
+ temp = (u64) cls->pg_limit * zone_total;
+ do_div(temp, ckrm_tot_lru_pages);
+ zone_limit = (int) temp;
+ clszone_limit = (shrink_to * zone_limit) / 100;
+ active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
+ inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
+
+ czone->shrink_active = 0;
+ cnt = czone->nr_active + act_credit - active_limit;
+ if (cnt > 0) {
+ czone->shrink_active = (unsigned long) cnt;
+ } else {
+ act_credit += cnt;
+ }
+
+ czone->shrink_inactive = 0;
+ cnt = czone->shrink_active + inact_credit +
+ (czone->nr_inactive - inactive_limit);
+ if (cnt > 0) {
+ czone->shrink_inactive = (unsigned long) cnt;
+ } else {
+ inact_credit += cnt;
+ }
+
+
+ if (czone->shrink_active || czone->shrink_inactive) {
+ sc.nr_to_reclaim = czone->shrink_inactive;
+ shrink_ckrmzone(czone, &sc);
+ }
+ zone->prev_priority = zone->temp_priority;
+ zindex++;
+ ckrm_clear_shrink(czone);
+ }
+ check_memclass(cls, "aft_shnk_cls");
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+ struct ckrm_mem_res *cls;
+
+ spin_lock_irq(&ckrm_mem_lock);
+ while (!ckrm_shrink_list_empty()) {
+ cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+ shrink_list);
+ list_del(&cls->shrink_list);
+ cls->flags &= ~MEM_AT_LIMIT;
+ spin_unlock_irq(&ckrm_mem_lock);
+ ckrm_shrink_class(cls);
+ spin_lock_irq(&ckrm_mem_lock);
+ }
+ spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes() do { } while(0)
+#endif
+
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
if (!ckrm_shrink_list_empty())
ckrm_shrink_classes();
- else
+ else
balance_pgdat(pgdat, 0);
}
return 0;