X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fpage-writeback.c;fp=mm%2Fpage-writeback.c;h=945559fb63d208bb5c10543aece7b117e84d3e97;hb=64ba3f394c830ec48a1c31b53dcae312c56f1604;hp=d42361aa06e6a5209bb014666c4e1beace8f007e;hpb=be1e6109ac94a859551f8e1774eb9a8469fe055c;p=linux-2.6.git diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d42361aa0..945559fb6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -29,7 +29,6 @@ #include #include #include -#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -73,14 +72,15 @@ int dirty_background_ratio = 10; int vm_dirty_ratio = 40; /* - * The interval between `kupdate'-style writebacks, in jiffies + * The interval between `kupdate'-style writebacks, in centiseconds + * (hundredths of a second) */ -int dirty_writeback_interval = 5 * HZ; +int dirty_writeback_centisecs = 5 * 100; /* - * The longest number of jiffies for which data is allowed to remain dirty + * The longest number of centiseconds for which data is allowed to remain dirty */ -int dirty_expire_interval = 30 * HZ; +int dirty_expire_centisecs = 30 * 100; /* * Flag that makes the machine dump writes/reads and block dirtyings. @@ -88,8 +88,7 @@ int dirty_expire_interval = 30 * HZ; int block_dump; /* - * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: - * a full sync is triggered after this time elapses without any disk activity. + * Flag that puts the machine in "laptop mode". */ int laptop_mode; @@ -100,6 +99,22 @@ EXPORT_SYMBOL(laptop_mode); static void background_writeout(unsigned long _min_pages); +struct writeback_state +{ + unsigned long nr_dirty; + unsigned long nr_unstable; + unsigned long nr_mapped; + unsigned long nr_writeback; +}; + +static void get_writeback_state(struct writeback_state *wbs) +{ + wbs->nr_dirty = read_page_state(nr_dirty); + wbs->nr_unstable = read_page_state(nr_unstable); + wbs->nr_mapped = read_page_state(nr_mapped); + wbs->nr_writeback = read_page_state(nr_writeback); +} + /* * Work out the current dirty-memory clamping and background writeout * thresholds. @@ -118,8 +133,8 @@ static void background_writeout(unsigned long _min_pages); * clamping level. */ static void -get_dirty_limits(long *pbackground, long *pdirty, - struct address_space *mapping) +get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, + struct address_space *mapping) { int background_ratio; /* Percentages */ int dirty_ratio; @@ -129,6 +144,8 @@ get_dirty_limits(long *pbackground, long *pdirty, unsigned long available_memory = total_pages; struct task_struct *tsk; + get_writeback_state(wbs); + #ifdef CONFIG_HIGHMEM /* * If this mapping can only allocate from low memory, @@ -139,9 +156,7 @@ get_dirty_limits(long *pbackground, long *pdirty, #endif - unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + - global_page_state(NR_ANON_PAGES)) * 100) / - total_pages; + unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages; dirty_ratio = vm_dirty_ratio; if (dirty_ratio > unmapped_ratio / 2) @@ -174,6 +189,7 @@ get_dirty_limits(long *pbackground, long *pdirty, */ static void balance_dirty_pages(struct address_space *mapping) { + struct writeback_state wbs; long nr_reclaimable; long background_thresh; long dirty_thresh; @@ -188,15 +204,13 @@ static void balance_dirty_pages(struct address_space *mapping) .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, - .range_cyclic = 1, }; - get_dirty_limits(&background_thresh, &dirty_thresh, mapping); - nr_reclaimable = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - if (nr_reclaimable + global_page_state(NR_WRITEBACK) <= - dirty_thresh) - break; + get_dirty_limits(&wbs, &background_thresh, + &dirty_thresh, mapping); + nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; + if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) + break; if (!dirty_exceeded) dirty_exceeded = 1; @@ -209,14 +223,11 @@ static void balance_dirty_pages(struct address_space *mapping) */ if (nr_reclaimable) { writeback_inodes(&wbc); - get_dirty_limits(&background_thresh, - &dirty_thresh, mapping); - nr_reclaimable = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - if (nr_reclaimable + - global_page_state(NR_WRITEBACK) - <= dirty_thresh) - break; + get_dirty_limits(&wbs, &background_thresh, + &dirty_thresh, mapping); + nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; + if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) + break; pages_written += write_chunk - wbc.nr_to_write; if (pages_written >= write_chunk) break; /* We've done our duty */ @@ -224,9 +235,8 @@ static void balance_dirty_pages(struct address_space *mapping) blk_congestion_wait(WRITE, HZ/10); } - if (nr_reclaimable + global_page_state(NR_WRITEBACK) - <= dirty_thresh && dirty_exceeded) - dirty_exceeded = 0; + if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded) + dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* pdflush is already working this queue */ @@ -244,20 +254,9 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } -void set_page_dirty_balance(struct page *page) -{ - if (set_page_dirty(page)) { - struct address_space *mapping = page_mapping(page); - - if (mapping) - balance_dirty_pages_ratelimited(mapping); - } -} - /** - * balance_dirty_pages_ratelimited_nr - balance dirty memory state + * balance_dirty_pages_ratelimited - balance dirty memory state * @mapping: address_space which was dirtied - * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's @@ -268,12 +267,10 @@ void set_page_dirty_balance(struct page *page) * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ -void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, - unsigned long nr_pages_dirtied) +void balance_dirty_pages_ratelimited(struct address_space *mapping) { - static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; - unsigned long ratelimit; - unsigned long *p; + static DEFINE_PER_CPU(int, ratelimits) = 0; + long ratelimit; ratelimit = ratelimit_pages; if (dirty_exceeded) @@ -283,26 +280,24 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ - preempt_disable(); - p = &__get_cpu_var(ratelimits); - *p += nr_pages_dirtied; - if (unlikely(*p >= ratelimit)) { - *p = 0; - preempt_enable(); + if (get_cpu_var(ratelimits)++ >= ratelimit) { + __get_cpu_var(ratelimits) = 0; + put_cpu_var(ratelimits); balance_dirty_pages(mapping); return; } - preempt_enable(); + put_cpu_var(ratelimits); } -EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); +EXPORT_SYMBOL(balance_dirty_pages_ratelimited); void throttle_vm_writeout(void) { + struct writeback_state wbs; long background_thresh; long dirty_thresh; for ( ; ; ) { - get_dirty_limits(&background_thresh, &dirty_thresh, NULL); + get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); /* * Boost the allowable dirty threshold a bit for page @@ -310,9 +305,8 @@ void throttle_vm_writeout(void) */ dirty_thresh += dirty_thresh / 10; /* wheeee... */ - if (global_page_state(NR_UNSTABLE_NFS) + - global_page_state(NR_WRITEBACK) <= dirty_thresh) - break; + if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh) + break; blk_congestion_wait(WRITE, HZ/10); } } @@ -331,16 +325,15 @@ static void background_writeout(unsigned long _min_pages) .older_than_this = NULL, .nr_to_write = 0, .nonblocking = 1, - .range_cyclic = 1, }; for ( ; ; ) { + struct writeback_state wbs; long background_thresh; long dirty_thresh; - get_dirty_limits(&background_thresh, &dirty_thresh, NULL); - if (global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS) < background_thresh + get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); + if (wbs.nr_dirty + wbs.nr_unstable < background_thresh && min_pages <= 0) break; wbc.encountered_congestion = 0; @@ -364,9 +357,12 @@ static void background_writeout(unsigned long _min_pages) */ int wakeup_pdflush(long nr_pages) { - if (nr_pages == 0) - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); + if (nr_pages == 0) { + struct writeback_state wbs; + + get_writeback_state(&wbs); + nr_pages = wbs.nr_dirty + wbs.nr_unstable; + } return pdflush_operation(background_writeout, nr_pages); } @@ -384,8 +380,8 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * - * Try to run once per dirty_writeback_interval. But if a writeback event - * takes longer than a dirty_writeback_interval interval, then leave a + * Try to run once per dirty_writeback_centisecs. But if a writeback event + * takes longer than a dirty_writeback_centisecs interval, then leave a * one-second gap. * * older_than_this takes precedence over nr_to_write. So we'll only write back @@ -397,6 +393,7 @@ static void wb_kupdate(unsigned long arg) unsigned long start_jif; unsigned long next_jif; long nr_to_write; + struct writeback_state wbs; struct writeback_control wbc = { .bdi = NULL, .sync_mode = WB_SYNC_NONE, @@ -404,16 +401,15 @@ static void wb_kupdate(unsigned long arg) .nr_to_write = 0, .nonblocking = 1, .for_kupdate = 1, - .range_cyclic = 1, }; sync_supers(); - oldest_jif = jiffies - dirty_expire_interval; + get_writeback_state(&wbs); + oldest_jif = jiffies - (dirty_expire_centisecs * HZ) / 100; start_jif = jiffies; - next_jif = start_jif + dirty_writeback_interval; - nr_to_write = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS) + + next_jif = start_jif + (dirty_writeback_centisecs * HZ) / 100; + nr_to_write = wbs.nr_dirty + wbs.nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { wbc.encountered_congestion = 0; @@ -429,7 +425,7 @@ static void wb_kupdate(unsigned long arg) } if (time_before(next_jif, jiffies + HZ)) next_jif = jiffies + HZ; - if (dirty_writeback_interval) + if (dirty_writeback_centisecs) mod_timer(&wb_timer, next_jif); } @@ -439,11 +435,11 @@ static void wb_kupdate(unsigned long arg) int dirty_writeback_centisecs_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos); - if (dirty_writeback_interval) { + proc_dointvec(table, write, file, buffer, length, ppos); + if (dirty_writeback_centisecs) { mod_timer(&wb_timer, - jiffies + dirty_writeback_interval); - } else { + jiffies + (dirty_writeback_centisecs * HZ) / 100); + } else { del_timer(&wb_timer); } return 0; @@ -472,7 +468,7 @@ static void laptop_timer_fn(unsigned long unused) */ void laptop_io_completion(void) { - mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode); + mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode * HZ); } /* @@ -511,14 +507,14 @@ static void set_ratelimit(void) ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; } -static int __cpuinit +static int ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { set_ratelimit(); return 0; } -static struct notifier_block __cpuinitdata ratelimit_nb = { +static struct notifier_block ratelimit_nb = { .notifier_call = ratelimit_handler, .next = NULL, }; @@ -548,7 +544,7 @@ void __init page_writeback_init(void) if (vm_dirty_ratio <= 0) vm_dirty_ratio = 1; } - mod_timer(&wb_timer, jiffies + dirty_writeback_interval); + mod_timer(&wb_timer, jiffies + (dirty_writeback_centisecs * HZ) / 100); set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } @@ -561,7 +557,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; @@ -625,6 +621,8 @@ EXPORT_SYMBOL(write_one_page); */ int __set_page_dirty_nobuffers(struct page *page) { + int ret = 0; + if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; @@ -635,8 +633,7 @@ int __set_page_dirty_nobuffers(struct page *page) if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); if (mapping_cap_account_dirty(mapping)) - __inc_zone_page_state(page, - NR_FILE_DIRTY); + inc_page_state(nr_dirty); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } @@ -647,9 +644,8 @@ int __set_page_dirty_nobuffers(struct page *page) I_DIRTY_PAGES); } } - return 1; } - return 0; + return ret; } EXPORT_SYMBOL(__set_page_dirty_nobuffers); @@ -679,10 +675,8 @@ int fastcall set_page_dirty(struct page *page) return (*spd)(page); return __set_page_dirty_buffers(page); } - if (!PageDirty(page)) { - if (!TestSetPageDirty(page)) - return 1; - } + if (!PageDirty(page)) + SetPageDirty(page); return 0; } EXPORT_SYMBOL(set_page_dirty); @@ -717,7 +711,6 @@ int test_clear_page_dirty(struct page *page) struct address_space *mapping = page_mapping(page); unsigned long flags; - WARN_ON_ONCE(!PageLocked(page)); if (mapping) { write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { @@ -725,14 +718,8 @@ int test_clear_page_dirty(struct page *page) page_index(page), PAGECACHE_TAG_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); - /* - * We can continue to use `mapping' here because the - * page is locked, which pins the address_space - */ - if (mapping_cap_account_dirty(mapping)) { - page_mkclean(page); - dec_zone_page_state(page, NR_FILE_DIRTY); - } + if (mapping_cap_account_dirty(mapping)) + dec_page_state(nr_dirty); return 1; } write_unlock_irqrestore(&mapping->tree_lock, flags); @@ -760,13 +747,10 @@ int clear_page_dirty_for_io(struct page *page) { struct address_space *mapping = page_mapping(page); - WARN_ON_ONCE(!PageLocked(page)); if (mapping) { if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) { - page_mkclean(page); - dec_zone_page_state(page, NR_FILE_DIRTY); - } + if (mapping_cap_account_dirty(mapping)) + dec_page_state(nr_dirty); return 1; } return 0;