4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
8 * This file contains the default values for the opereation of the
9 * Linux VM subsystem. Fine-tuning documentation can be found in
10 * Documentation/sysctl/vm.txt.
12 * Swap aging added 23.2.95, Stephen Tweedie.
13 * Buffermem limits added 12.3.98, Rik van Riel.
17 #include <linux/sched.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/swap.h>
20 #include <linux/mman.h>
21 #include <linux/pagemap.h>
22 #include <linux/pagevec.h>
23 #include <linux/init.h>
24 #include <linux/module.h>
25 #include <linux/mm_inline.h>
26 #include <linux/buffer_head.h> /* for try_to_release_page() */
27 #include <linux/module.h>
28 #include <linux/percpu_counter.h>
29 #include <linux/percpu.h>
30 #include <linux/cpu.h>
31 #include <linux/notifier.h>
32 #include <linux/init.h>
33 #include <linux/ckrm_mem_inline.h>
35 /* How many pages do we try to swap or page in/out together? */
38 #ifdef CONFIG_HUGETLB_PAGE
40 void put_page(struct page *page)
42 if (unlikely(PageCompound(page))) {
43 page = (struct page *)page->private;
44 if (put_page_testzero(page)) {
45 void (*dtor)(struct page *page);
47 dtor = (void (*)(struct page *))page[1].mapping;
52 if (!PageReserved(page) && put_page_testzero(page))
53 __page_cache_release(page);
55 EXPORT_SYMBOL(put_page);
59 * Writeback is about to end against a page which has been marked for immediate
60 * reclaim. If it still appears to be reclaimable, move it to the tail of the
61 * inactive list. The page still has PageWriteback set, which will pin it.
63 * We don't expect many pages to come through here, so don't bother batching
66 * To avoid placing the page at the tail of the LRU while PG_writeback is still
67 * set, this function will clear PG_writeback before performing the page
68 * motion. Do that inside the lru lock because once PG_writeback is cleared
69 * we may not touch the page.
71 * Returns zero if it cleared PG_writeback.
73 int rotate_reclaimable_page(struct page *page)
75 #ifdef CONFIG_CKRM_RES_MEM
76 struct ckrm_zone *ckrm_zone = page_ckrmzone(page);
77 struct zone *zone = ckrm_zone->zone;
79 struct zone *zone = page_zone(page);
92 spin_lock_irqsave(&zone->lru_lock, flags);
93 if (PageLRU(page) && !PageActive(page)) {
95 #ifdef CONFIG_CKRM_RES_MEM
96 list_add_tail(&page->lru, &ckrm_zone->inactive_list);
98 list_add_tail(&page->lru, &zone->inactive_list);
100 inc_page_state(pgrotated);
102 if (!test_clear_page_writeback(page))
104 spin_unlock_irqrestore(&zone->lru_lock, flags);
109 * FIXME: speed this up?
111 void fastcall activate_page(struct page *page)
113 struct zone *zone = page_zone(page);
115 spin_lock_irq(&zone->lru_lock);
116 if (PageLRU(page) && !PageActive(page)) {
117 del_page_from_inactive_list(zone, page);
119 add_page_to_active_list(zone, page);
120 inc_page_state(pgactivate);
122 spin_unlock_irq(&zone->lru_lock);
126 * Mark a page as having seen activity.
128 * inactive,unreferenced -> inactive,referenced
129 * inactive,referenced -> active,unreferenced
130 * active,unreferenced -> active,referenced
132 void fastcall mark_page_accessed(struct page *page)
134 if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
136 ClearPageReferenced(page);
137 } else if (!PageReferenced(page)) {
138 SetPageReferenced(page);
142 EXPORT_SYMBOL(mark_page_accessed);
145 * lru_cache_add: add a page to the page lists
146 * @page: the page to add
148 static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
149 static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
151 void fastcall lru_cache_add(struct page *page)
153 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
155 page_cache_get(page);
156 if (!pagevec_add(pvec, page))
157 __pagevec_lru_add(pvec);
158 put_cpu_var(lru_add_pvecs);
161 void fastcall lru_cache_add_active(struct page *page)
163 struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
165 page_cache_get(page);
166 if (!pagevec_add(pvec, page))
167 __pagevec_lru_add_active(pvec);
168 put_cpu_var(lru_add_active_pvecs);
171 void lru_add_drain(void)
173 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
175 if (pagevec_count(pvec))
176 __pagevec_lru_add(pvec);
177 pvec = &__get_cpu_var(lru_add_active_pvecs);
178 if (pagevec_count(pvec))
179 __pagevec_lru_add_active(pvec);
180 put_cpu_var(lru_add_pvecs);
184 * This path almost never happens for VM activity - pages are normally
185 * freed via pagevecs. But it gets used by networking.
187 void fastcall __page_cache_release(struct page *page)
190 struct zone *zone = page_zone(page);
192 spin_lock_irqsave(&zone->lru_lock, flags);
193 if (TestClearPageLRU(page))
194 del_page_from_lru(zone, page);
195 if (page_count(page) != 0)
197 spin_unlock_irqrestore(&zone->lru_lock, flags);
202 EXPORT_SYMBOL(__page_cache_release);
205 * Batched page_cache_release(). Decrement the reference count on all the
206 * passed pages. If it fell to zero then remove the page from the LRU and
209 * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
210 * for the remainder of the operation.
212 * The locking in this function is against shrink_cache(): we recheck the
213 * page count inside the lock to see whether shrink_cache grabbed the page
214 * via the LRU. If it did, give up: shrink_cache will free it.
216 void release_pages(struct page **pages, int nr, int cold)
219 struct pagevec pages_to_free;
220 struct zone *zone = NULL;
222 pagevec_init(&pages_to_free, cold);
223 for (i = 0; i < nr; i++) {
224 struct page *page = pages[i];
225 struct zone *pagezone;
227 if (PageReserved(page) || !put_page_testzero(page))
230 pagezone = page_zone(page);
231 if (pagezone != zone) {
233 spin_unlock_irq(&zone->lru_lock);
235 spin_lock_irq(&zone->lru_lock);
237 if (TestClearPageLRU(page))
238 del_page_from_lru(zone, page);
239 if (page_count(page) == 0) {
240 if (!pagevec_add(&pages_to_free, page)) {
241 spin_unlock_irq(&zone->lru_lock);
242 __pagevec_free(&pages_to_free);
243 pagevec_reinit(&pages_to_free);
244 zone = NULL; /* No lock is held */
249 spin_unlock_irq(&zone->lru_lock);
251 pagevec_free(&pages_to_free);
255 * The pages which we're about to release may be in the deferred lru-addition
256 * queues. That would prevent them from really being freed right now. That's
257 * OK from a correctness point of view but is inefficient - those pages may be
258 * cache-warm and we want to give them back to the page allocator ASAP.
260 * So __pagevec_release() will drain those queues here. __pagevec_lru_add()
261 * and __pagevec_lru_add_active() call release_pages() directly to avoid
264 void __pagevec_release(struct pagevec *pvec)
267 release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
268 pagevec_reinit(pvec);
272 * pagevec_release() for pages which are known to not be on the LRU
274 * This function reinitialises the caller's pagevec.
276 void __pagevec_release_nonlru(struct pagevec *pvec)
279 struct pagevec pages_to_free;
281 pagevec_init(&pages_to_free, pvec->cold);
282 pages_to_free.cold = pvec->cold;
283 for (i = 0; i < pagevec_count(pvec); i++) {
284 struct page *page = pvec->pages[i];
286 BUG_ON(PageLRU(page));
287 if (put_page_testzero(page))
288 pagevec_add(&pages_to_free, page);
290 pagevec_free(&pages_to_free);
291 pagevec_reinit(pvec);
295 * Add the passed pages to the LRU, then drop the caller's refcount
296 * on them. Reinitialises the caller's pagevec.
298 void __pagevec_lru_add(struct pagevec *pvec)
301 struct zone *zone = NULL;
303 for (i = 0; i < pagevec_count(pvec); i++) {
304 struct page *page = pvec->pages[i];
305 struct zone *pagezone = page_zone(page);
307 if (pagezone != zone) {
309 spin_unlock_irq(&zone->lru_lock);
311 spin_lock_irq(&zone->lru_lock);
313 if (TestSetPageLRU(page))
315 add_page_to_inactive_list(zone, page);
318 spin_unlock_irq(&zone->lru_lock);
319 release_pages(pvec->pages, pvec->nr, pvec->cold);
320 pagevec_reinit(pvec);
323 EXPORT_SYMBOL(__pagevec_lru_add);
325 void __pagevec_lru_add_active(struct pagevec *pvec)
328 struct zone *zone = NULL;
330 for (i = 0; i < pagevec_count(pvec); i++) {
331 struct page *page = pvec->pages[i];
332 struct zone *pagezone = page_zone(page);
334 if (pagezone != zone) {
336 spin_unlock_irq(&zone->lru_lock);
338 spin_lock_irq(&zone->lru_lock);
340 if (TestSetPageLRU(page))
342 if (TestSetPageActive(page))
344 add_page_to_active_list(zone, page);
347 spin_unlock_irq(&zone->lru_lock);
348 release_pages(pvec->pages, pvec->nr, pvec->cold);
349 pagevec_reinit(pvec);
353 * Try to drop buffers from the pages in a pagevec
355 void pagevec_strip(struct pagevec *pvec)
359 for (i = 0; i < pagevec_count(pvec); i++) {
360 struct page *page = pvec->pages[i];
362 if (PagePrivate(page) && !TestSetPageLocked(page)) {
363 try_to_release_page(page, 0);
370 * pagevec_lookup - gang pagecache lookup
371 * @pvec: Where the resulting pages are placed
372 * @mapping: The address_space to search
373 * @start: The starting page index
374 * @nr_pages: The maximum number of pages
376 * pagevec_lookup() will search for and return a group of up to @nr_pages pages
377 * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a
378 * reference against the pages in @pvec.
380 * The search returns a group of mapping-contiguous pages with ascending
381 * indexes. There may be holes in the indices due to not-present pages.
383 * pagevec_lookup() returns the number of pages which were found.
385 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
386 pgoff_t start, unsigned nr_pages)
388 pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
389 return pagevec_count(pvec);
392 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
393 pgoff_t *index, int tag, unsigned nr_pages)
395 pvec->nr = find_get_pages_tag(mapping, index, tag,
396 nr_pages, pvec->pages);
397 return pagevec_count(pvec);
403 * We tolerate a little inaccuracy to avoid ping-ponging the counter between
406 #define ACCT_THRESHOLD max(16, NR_CPUS * 2)
408 static DEFINE_PER_CPU(long, committed_space) = 0;
410 void vm_acct_memory(long pages)
415 local = &__get_cpu_var(committed_space);
417 if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
418 atomic_add(*local, &vm_committed_space);
423 EXPORT_SYMBOL(vm_acct_memory);
425 #ifdef CONFIG_HOTPLUG_CPU
426 static void lru_drain_cache(unsigned int cpu)
428 struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
430 /* CPU is dead, so no locking needed. */
431 if (pagevec_count(pvec))
432 __pagevec_lru_add(pvec);
433 pvec = &per_cpu(lru_add_active_pvecs, cpu);
434 if (pagevec_count(pvec))
435 __pagevec_lru_add_active(pvec);
438 /* Drop the CPU's cached committed space back into the central pool. */
439 static int cpu_swap_callback(struct notifier_block *nfb,
440 unsigned long action,
445 committed = &per_cpu(committed_space, (long)hcpu);
446 if (action == CPU_DEAD) {
447 atomic_add(*committed, &vm_committed_space);
449 lru_drain_cache((long)hcpu);
453 #endif /* CONFIG_HOTPLUG_CPU */
454 #endif /* CONFIG_SMP */
457 void percpu_counter_mod(struct percpu_counter *fbc, long amount)
463 pcount = per_cpu_ptr(fbc->counters, cpu);
464 count = *pcount + amount;
465 if (count >= FBC_BATCH || count <= -FBC_BATCH) {
466 spin_lock(&fbc->lock);
468 spin_unlock(&fbc->lock);
474 EXPORT_SYMBOL(percpu_counter_mod);
478 * Perform any setup for the swap system
480 void __init swap_setup(void)
482 unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
484 /* Use a smaller cluster for small-memory machines */
490 * Right now other parts of the system means that we
491 * _really_ don't want to cluster much more
493 hotcpu_notifier(cpu_swap_callback, 0);