static long ratelimit_pages = 32;
static long total_pages; /* The total number of pages in the machine. */
-static int dirty_exceeded; /* Dirty mem may be over limit */
+static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */
/*
* When balance_dirty_pages decides that the caller needs to perform some
static void background_writeout(unsigned long _min_pages);
+struct writeback_state
+{
+ unsigned long nr_dirty;
+ unsigned long nr_unstable;
+ unsigned long nr_mapped;
+ unsigned long nr_writeback;
+};
+
+static void get_writeback_state(struct writeback_state *wbs)
+{
+ wbs->nr_dirty = read_page_state(nr_dirty);
+ wbs->nr_unstable = read_page_state(nr_unstable);
+ wbs->nr_mapped = read_page_state(nr_mapped);
+ wbs->nr_writeback = read_page_state(nr_writeback);
+}
+
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
* clamping level.
*/
static void
-get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
+get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
+ struct address_space *mapping)
{
int background_ratio; /* Percentages */
int dirty_ratio;
int unmapped_ratio;
long background;
long dirty;
+ unsigned long available_memory = total_pages;
struct task_struct *tsk;
- get_page_state(ps);
+ get_writeback_state(wbs);
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * If this mapping can only allocate from low memory,
+ * we exclude high memory from our count.
+ */
+ if (mapping && !(mapping_gfp_mask(mapping) & __GFP_HIGHMEM))
+ available_memory -= totalhigh_pages;
+#endif
- unmapped_ratio = 100 - (ps->nr_mapped * 100) / total_pages;
+
+ unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages;
dirty_ratio = vm_dirty_ratio;
if (dirty_ratio > unmapped_ratio / 2)
if (background_ratio >= dirty_ratio)
background_ratio = dirty_ratio / 2;
- background = (background_ratio * total_pages) / 100;
- dirty = (dirty_ratio * total_pages) / 100;
+ background = (background_ratio * available_memory) / 100;
+ dirty = (dirty_ratio * available_memory) / 100;
tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
background += background / 4;
*/
static void balance_dirty_pages(struct address_space *mapping)
{
- struct page_state ps;
+ struct writeback_state wbs;
long nr_reclaimable;
long background_thresh;
long dirty_thresh;
.nr_to_write = write_chunk,
};
- get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
- nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
- if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
+ get_dirty_limits(&wbs, &background_thresh,
+ &dirty_thresh, mapping);
+ nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable;
+ if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh)
break;
- dirty_exceeded = 1;
+ if (!dirty_exceeded)
+ dirty_exceeded = 1;
/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
* Unstable writes are a feature of certain networked
*/
if (nr_reclaimable) {
writeback_inodes(&wbc);
- get_dirty_limits(&ps, &background_thresh,
- &dirty_thresh);
- nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
- if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
+ get_dirty_limits(&wbs, &background_thresh,
+ &dirty_thresh, mapping);
+ nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable;
+ if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh)
break;
pages_written += write_chunk - wbc.nr_to_write;
if (pages_written >= write_chunk)
blk_congestion_wait(WRITE, HZ/10);
}
- if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
+ if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded)
dirty_exceeded = 0;
if (writeback_in_progress(bdi))
/**
* balance_dirty_pages_ratelimited - balance dirty memory state
- * @mapping - address_space which was dirtied
+ * @mapping: address_space which was dirtied
*
* Processes which are dirtying memory should call in here once for each page
* which was newly dirtied. The function will periodically check the system's
* dirty state and will initiate writeback if needed.
*
- * On really big machines, get_page_state is expensive, so try to avoid calling
- * it too often (ratelimiting). But once we're over the dirty memory limit we
- * decrease the ratelimiting by a lot, to prevent individual processes from
- * overshooting the limit by (ratelimit_pages) each.
+ * On really big machines, get_writeback_state is expensive, so try to avoid
+ * calling it too often (ratelimiting). But once we're over the dirty memory
+ * limit we decrease the ratelimiting by a lot, to prevent individual processes
+ * from overshooting the limit by (ratelimit_pages) each.
*/
void balance_dirty_pages_ratelimited(struct address_space *mapping)
{
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
+void throttle_vm_writeout(void)
+{
+ struct writeback_state wbs;
+ long background_thresh;
+ long dirty_thresh;
+
+ for ( ; ; ) {
+ get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL);
+
+ /*
+ * Boost the allowable dirty threshold a bit for page
+ * allocators so they don't get DoS'ed by heavy writers
+ */
+ dirty_thresh += dirty_thresh / 10; /* wheeee... */
+
+ if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh)
+ break;
+ blk_congestion_wait(WRITE, HZ/10);
+ }
+}
+
+
/*
* writeback at least _min_pages, and keep writing until the amount of dirty
* memory is less than the background threshold, or until we're all clean.
};
for ( ; ; ) {
- struct page_state ps;
+ struct writeback_state wbs;
long background_thresh;
long dirty_thresh;
- get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
- if (ps.nr_dirty + ps.nr_unstable < background_thresh
+ get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL);
+ if (wbs.nr_dirty + wbs.nr_unstable < background_thresh
&& min_pages <= 0)
break;
wbc.encountered_congestion = 0;
* the whole world. Returns 0 if a pdflush thread was dispatched. Returns
* -1 if all pdflush threads were busy.
*/
-int wakeup_bdflush(long nr_pages)
+int wakeup_pdflush(long nr_pages)
{
if (nr_pages == 0) {
- struct page_state ps;
+ struct writeback_state wbs;
- get_page_state(&ps);
- nr_pages = ps.nr_dirty + ps.nr_unstable;
+ get_writeback_state(&wbs);
+ nr_pages = wbs.nr_dirty + wbs.nr_unstable;
}
return pdflush_operation(background_writeout, nr_pages);
}
static void wb_timer_fn(unsigned long unused);
static void laptop_timer_fn(unsigned long unused);
-static struct timer_list wb_timer =
- TIMER_INITIALIZER(wb_timer_fn, 0, 0);
-static struct timer_list laptop_mode_wb_timer =
- TIMER_INITIALIZER(laptop_timer_fn, 0, 0);
+static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
+static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
/*
* Periodic writeback of "old" data.
unsigned long start_jif;
unsigned long next_jif;
long nr_to_write;
- struct page_state ps;
+ struct writeback_state wbs;
struct writeback_control wbc = {
.bdi = NULL,
.sync_mode = WB_SYNC_NONE,
sync_supers();
- get_page_state(&ps);
+ get_writeback_state(&wbs);
oldest_jif = jiffies - (dirty_expire_centisecs * HZ) / 100;
start_jif = jiffies;
next_jif = start_jif + (dirty_writeback_centisecs * HZ) / 100;
- nr_to_write = ps.nr_dirty + ps.nr_unstable +
+ nr_to_write = wbs.nr_dirty + wbs.nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
while (nr_to_write > 0) {
wbc.encountered_congestion = 0;
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
int dirty_writeback_centisecs_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length)
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length);
+ proc_dointvec(table, write, file, buffer, length, ppos);
if (dirty_writeback_centisecs) {
mod_timer(&wb_timer,
jiffies + (dirty_writeback_centisecs * HZ) / 100);
/*
* If ratelimit_pages is too high then we can get into dirty-data overload
* if a large number of processes all perform writes at the same time.
- * If it is too low then SMP machines will call the (expensive) get_page_state
- * too often.
+ * If it is too low then SMP machines will call the (expensive)
+ * get_writeback_state too often.
*
* Here we set ratelimit_pages to a level which ensures that when all CPUs are
* dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
dirty_background_ratio /= 100;
vm_dirty_ratio *= correction;
vm_dirty_ratio /= 100;
+
+ if (dirty_background_ratio <= 0)
+ dirty_background_ratio = 1;
+ if (vm_dirty_ratio <= 0)
+ vm_dirty_ratio = 1;
}
mod_timer(&wb_timer, jiffies + (dirty_writeback_centisecs * HZ) / 100);
set_ratelimit();
int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
+ int ret;
+
if (wbc->nr_to_write <= 0)
return 0;
+ wbc->for_writepages = 1;
if (mapping->a_ops->writepages)
- return mapping->a_ops->writepages(mapping, wbc);
- return generic_writepages(mapping, wbc);
+ ret = mapping->a_ops->writepages(mapping, wbc);
+ else
+ ret = generic_writepages(mapping, wbc);
+ wbc->for_writepages = 0;
+ return ret;
}
/**
* write_one_page - write out a single page and optionally wait on I/O
*
- * @page - the page to write
- * @wait - if true, wait on writeout
+ * @page: the page to write
+ * @wait: if true, wait on writeout
*
* The page must be locked by the caller and will be unlocked upon return.
*
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping2;
if (mapping) {
- spin_lock_irq(&mapping->tree_lock);
- mapping = page_mapping(page);
- if (page_mapping(page)) { /* Race with truncate? */
- BUG_ON(page_mapping(page) != mapping);
- if (!mapping->backing_dev_info->memory_backed)
+ write_lock_irq(&mapping->tree_lock);
+ mapping2 = page_mapping(page);
+ if (mapping2) { /* Race with truncate? */
+ BUG_ON(mapping2 != mapping);
+ if (mapping_cap_account_dirty(mapping))
inc_page_state(nr_dirty);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ write_unlock_irq(&mapping->tree_lock);
if (mapping->host) {
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host,
unsigned long flags;
if (mapping) {
- spin_lock_irqsave(&mapping->tree_lock, flags);
+ write_lock_irqsave(&mapping->tree_lock, flags);
if (TestClearPageDirty(page)) {
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
- if (!mapping->backing_dev_info->memory_backed)
+ write_unlock_irqrestore(&mapping->tree_lock, flags);
+ if (mapping_cap_account_dirty(mapping))
dec_page_state(nr_dirty);
return 1;
}
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ write_unlock_irqrestore(&mapping->tree_lock, flags);
return 0;
}
return TestClearPageDirty(page);
if (mapping) {
if (TestClearPageDirty(page)) {
- if (!mapping->backing_dev_info->memory_backed)
+ if (mapping_cap_account_dirty(mapping))
dec_page_state(nr_dirty);
return 1;
}
}
EXPORT_SYMBOL(clear_page_dirty_for_io);
-/*
- * Clear a page's dirty flag while ignoring dirty memory accounting
- */
-int __clear_page_dirty(struct page *page)
-{
- struct address_space *mapping = page_mapping(page);
-
- if (mapping) {
- unsigned long flags;
-
- spin_lock_irqsave(&mapping->tree_lock, flags);
- if (TestClearPageDirty(page)) {
- radix_tree_tag_clear(&mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
- return 1;
- }
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
- return 0;
- }
- return TestClearPageDirty(page);
-}
-
int test_clear_page_writeback(struct page *page)
{
struct address_space *mapping = page_mapping(page);
if (mapping) {
unsigned long flags;
- spin_lock_irqsave(&mapping->tree_lock, flags);
+ write_lock_irqsave(&mapping->tree_lock, flags);
ret = TestClearPageWriteback(page);
if (ret)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_WRITEBACK);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ write_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestClearPageWriteback(page);
}
if (mapping) {
unsigned long flags;
- spin_lock_irqsave(&mapping->tree_lock, flags);
+ write_lock_irqsave(&mapping->tree_lock, flags);
ret = TestSetPageWriteback(page);
if (!ret)
radix_tree_tag_set(&mapping->page_tree,
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ write_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestSetPageWriteback(page);
}
unsigned long flags;
int ret;
- spin_lock_irqsave(&mapping->tree_lock, flags);
+ read_lock_irqsave(&mapping->tree_lock, flags);
ret = radix_tree_tagged(&mapping->page_tree, tag);
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ read_unlock_irqrestore(&mapping->tree_lock, flags);
return ret;
}
EXPORT_SYMBOL(mapping_tagged);