X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=75d7f48b79bba537d522cbd709138d48196d6782;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=624ce500fb169a8f3cc9f275fd26fc0f13dc3612;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 624ce500f..75d7f48b7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -46,7 +46,7 @@ static long ratelimit_pages = 32; static long total_pages; /* The total number of pages in the machine. */ -static int dirty_exceeded; /* Dirty mem may be over limit */ +static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ /* * When balance_dirty_pages decides that the caller needs to perform some @@ -72,15 +72,14 @@ int dirty_background_ratio = 10; int vm_dirty_ratio = 40; /* - * The interval between `kupdate'-style writebacks, in centiseconds - * (hundredths of a second) + * The interval between `kupdate'-style writebacks, in jiffies */ -int dirty_writeback_centisecs = 5 * 100; +int dirty_writeback_interval = 5 * HZ; /* - * The longest number of centiseconds for which data is allowed to remain dirty + * The longest number of jiffies for which data is allowed to remain dirty */ -int dirty_expire_centisecs = 30 * 100; +int dirty_expire_interval = 30 * HZ; /* * Flag that makes the machine dump writes/reads and block dirtyings. @@ -88,7 +87,8 @@ int dirty_expire_centisecs = 30 * 100; int block_dump; /* - * Flag that puts the machine in "laptop mode". + * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: + * a full sync is triggered after this time elapses without any disk activity. */ int laptop_mode; @@ -212,7 +212,8 @@ static void balance_dirty_pages(struct address_space *mapping) if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) break; - dirty_exceeded = 1; + if (!dirty_exceeded) + dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked @@ -234,7 +235,7 @@ static void balance_dirty_pages(struct address_space *mapping) blk_congestion_wait(WRITE, HZ/10); } - if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) + if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded) dirty_exceeded = 0; if (writeback_in_progress(bdi)) @@ -254,8 +255,9 @@ static void balance_dirty_pages(struct address_space *mapping) } /** - * balance_dirty_pages_ratelimited - balance dirty memory state - * @mapping - address_space which was dirtied + * balance_dirty_pages_ratelimited_nr - balance dirty memory state + * @mapping: address_space which was dirtied + * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's @@ -266,10 +268,12 @@ static void balance_dirty_pages(struct address_space *mapping) * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ -void balance_dirty_pages_ratelimited(struct address_space *mapping) +void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, + unsigned long nr_pages_dirtied) { - static DEFINE_PER_CPU(int, ratelimits) = 0; - long ratelimit; + static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; + unsigned long ratelimit; + unsigned long *p; ratelimit = ratelimit_pages; if (dirty_exceeded) @@ -279,15 +283,40 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ - if (get_cpu_var(ratelimits)++ >= ratelimit) { - __get_cpu_var(ratelimits) = 0; - put_cpu_var(ratelimits); + preempt_disable(); + p = &__get_cpu_var(ratelimits); + *p += nr_pages_dirtied; + if (unlikely(*p >= ratelimit)) { + *p = 0; + preempt_enable(); balance_dirty_pages(mapping); return; } - put_cpu_var(ratelimits); + preempt_enable(); } -EXPORT_SYMBOL(balance_dirty_pages_ratelimited); +EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); + +void throttle_vm_writeout(void) +{ + struct writeback_state wbs; + long background_thresh; + long dirty_thresh; + + for ( ; ; ) { + get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); + + /* + * Boost the allowable dirty threshold a bit for page + * allocators so they don't get DoS'ed by heavy writers + */ + dirty_thresh += dirty_thresh / 10; /* wheeee... */ + + if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh) + break; + blk_congestion_wait(WRITE, HZ/10); + } +} + /* * writeback at least _min_pages, and keep writing until the amount of dirty @@ -332,7 +361,7 @@ static void background_writeout(unsigned long _min_pages) * the whole world. Returns 0 if a pdflush thread was dispatched. Returns * -1 if all pdflush threads were busy. */ -int wakeup_bdflush(long nr_pages) +int wakeup_pdflush(long nr_pages) { if (nr_pages == 0) { struct writeback_state wbs; @@ -346,10 +375,8 @@ int wakeup_bdflush(long nr_pages) static void wb_timer_fn(unsigned long unused); static void laptop_timer_fn(unsigned long unused); -static struct timer_list wb_timer = - TIMER_INITIALIZER(wb_timer_fn, 0, 0); -static struct timer_list laptop_mode_wb_timer = - TIMER_INITIALIZER(laptop_timer_fn, 0, 0); +static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); +static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); /* * Periodic writeback of "old" data. @@ -359,8 +386,8 @@ static struct timer_list laptop_mode_wb_timer = * just walks the superblock inode list, writing back any inodes which are * older than a specific point in time. * - * Try to run once per dirty_writeback_centisecs. But if a writeback event - * takes longer than a dirty_writeback_centisecs interval, then leave a + * Try to run once per dirty_writeback_interval. But if a writeback event + * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * * older_than_this takes precedence over nr_to_write. So we'll only write back @@ -385,9 +412,9 @@ static void wb_kupdate(unsigned long arg) sync_supers(); get_writeback_state(&wbs); - oldest_jif = jiffies - (dirty_expire_centisecs * HZ) / 100; + oldest_jif = jiffies - dirty_expire_interval; start_jif = jiffies; - next_jif = start_jif + (dirty_writeback_centisecs * HZ) / 100; + next_jif = start_jif + dirty_writeback_interval; nr_to_write = wbs.nr_dirty + wbs.nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { @@ -404,7 +431,7 @@ static void wb_kupdate(unsigned long arg) } if (time_before(next_jif, jiffies + HZ)) next_jif = jiffies + HZ; - if (dirty_writeback_centisecs) + if (dirty_writeback_interval) mod_timer(&wb_timer, next_jif); } @@ -414,11 +441,11 @@ static void wb_kupdate(unsigned long arg) int dirty_writeback_centisecs_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); - if (dirty_writeback_centisecs) { + proc_dointvec_userhz_jiffies(table, write, file, buffer, length, ppos); + if (dirty_writeback_interval) { mod_timer(&wb_timer, - jiffies + (dirty_writeback_centisecs * HZ) / 100); - } else { + jiffies + dirty_writeback_interval); + } else { del_timer(&wb_timer); } return 0; @@ -447,7 +474,7 @@ static void laptop_timer_fn(unsigned long unused) */ void laptop_io_completion(void) { - mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode * HZ); + mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode); } /* @@ -523,25 +550,31 @@ void __init page_writeback_init(void) if (vm_dirty_ratio <= 0) vm_dirty_ratio = 1; } - mod_timer(&wb_timer, jiffies + (dirty_writeback_centisecs * HZ) / 100); + mod_timer(&wb_timer, jiffies + dirty_writeback_interval); set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { + int ret; + if (wbc->nr_to_write <= 0) return 0; + wbc->for_writepages = 1; if (mapping->a_ops->writepages) - return mapping->a_ops->writepages(mapping, wbc); - return generic_writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); + else + ret = generic_writepages(mapping, wbc); + wbc->for_writepages = 0; + return ret; } /** * write_one_page - write out a single page and optionally wait on I/O * - * @page - the page to write - * @wait - if true, wait on writeout + * @page: the page to write + * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * @@ -594,31 +627,30 @@ EXPORT_SYMBOL(write_one_page); */ int __set_page_dirty_nobuffers(struct page *page) { - int ret = 0; - if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; if (mapping) { - spin_lock_irq(&mapping->tree_lock); + write_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); - if (!mapping->backing_dev_info->memory_backed) + if (mapping_cap_account_dirty(mapping)) inc_page_state(nr_dirty); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } } + return 1; } - return ret; + return 0; } EXPORT_SYMBOL(__set_page_dirty_nobuffers); @@ -648,8 +680,10 @@ int fastcall set_page_dirty(struct page *page) return (*spd)(page); return __set_page_dirty_buffers(page); } - if (!PageDirty(page)) - SetPageDirty(page); + if (!PageDirty(page)) { + if (!TestSetPageDirty(page)) + return 1; + } return 0; } EXPORT_SYMBOL(set_page_dirty); @@ -685,17 +719,17 @@ int test_clear_page_dirty(struct page *page) unsigned long flags; if (mapping) { - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); if (TestClearPageDirty(page)) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - if (!mapping->backing_dev_info->memory_backed) + write_unlock_irqrestore(&mapping->tree_lock, flags); + if (mapping_cap_account_dirty(mapping)) dec_page_state(nr_dirty); return 1; } - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); return 0; } return TestClearPageDirty(page); @@ -722,7 +756,7 @@ int clear_page_dirty_for_io(struct page *page) if (mapping) { if (TestClearPageDirty(page)) { - if (!mapping->backing_dev_info->memory_backed) + if (mapping_cap_account_dirty(mapping)) dec_page_state(nr_dirty); return 1; } @@ -732,30 +766,6 @@ int clear_page_dirty_for_io(struct page *page) } EXPORT_SYMBOL(clear_page_dirty_for_io); -/* - * Clear a page's dirty flag while ignoring dirty memory accounting - */ -int __clear_page_dirty(struct page *page) -{ - struct address_space *mapping = page_mapping(page); - - if (mapping) { - unsigned long flags; - - spin_lock_irqsave(&mapping->tree_lock, flags); - if (TestClearPageDirty(page)) { - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - return 1; - } - spin_unlock_irqrestore(&mapping->tree_lock, flags); - return 0; - } - return TestClearPageDirty(page); -} - int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -764,13 +774,13 @@ int test_clear_page_writeback(struct page *page) if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } @@ -785,7 +795,7 @@ int test_set_page_writeback(struct page *page) if (mapping) { unsigned long flags; - spin_lock_irqsave(&mapping->tree_lock, flags); + write_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, @@ -795,7 +805,7 @@ int test_set_page_writeback(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + write_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } @@ -813,9 +823,9 @@ int mapping_tagged(struct address_space *mapping, int tag) unsigned long flags; int ret; - spin_lock_irqsave(&mapping->tree_lock, flags); + read_lock_irqsave(&mapping->tree_lock, flags); ret = radix_tree_tagged(&mapping->page_tree, tag); - spin_unlock_irqrestore(&mapping->tree_lock, flags); + read_unlock_irqrestore(&mapping->tree_lock, flags); return ret; } EXPORT_SYMBOL(mapping_tagged);