#include <linux/security.h>
#include <linux/syscalls.h>
/*
- * This is needed for the following functions:
- * - try_to_release_page
- * - block_invalidatepage
- * - generic_osync_inode
- *
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
#include <linux/buffer_head.h> /* for generic_osync_inode */
{
struct address_space *mapping = page->mapping;
- if (unlikely(!PageLocked(page)))
- PAGE_BUG(page);
+ BUG_ON(!PageLocked(page));
- spin_lock_irq(&mapping->tree_lock);
+ write_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
- spin_unlock_irq(&mapping->tree_lock);
+ write_unlock_irq(&mapping->tree_lock);
}
static int sync_page(void *word)
page = container_of((page_flags_t *)word, struct page, flags);
/*
- * FIXME, fercrissake. What is this barrier here for?
+ * page_mapping() is being called without PG_locked held.
+ * Some knowledge of the state and use of the page is used to
+ * reduce the requirements down to a memory barrier.
+ * The danger here is of a stale page_mapping() return value
+ * indicating a struct address_space different from the one it's
+ * associated with when it is associated with one.
+ * After smp_mb(), it's either the correct page_mapping() for
+ * the page, or an old page_mapping() and the page's own
+ * page_mapping() has gone NULL.
+ * The ->sync_page() address_space operation must tolerate
+ * page_mapping() going NULL. By an amazing coincidence,
+ * this comes about because none of the users of the page
+ * in the ->sync_page() methods make essential use of the
+ * page_mapping(), merely passing the page down to the backing
+ * device's unplug functions when it's non-NULL, which in turn
+ * ignore it for all cases but swap, where only page->private is
+ * of interest. When page_mapping() does go NULL, the entire
+ * call stack gracefully ignores the page and returns.
+ * -- wli
*/
smp_mb();
mapping = page_mapping(page);
/**
* filemap_fdatawrite_range - start writeback against all of a mapping's
* dirty pages that lie within the byte offsets <start, end>
- * @mapping: address space structure to write
- * @start: offset in bytes where the range starts
- * @end : offset in bytes where the range ends
+ * @mapping: address space structure to write
+ * @start: offset in bytes where the range starts
+ * @end: offset in bytes where the range ends
+ * @sync_mode: enable synchronous operation
*
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
* opposed to a regular memory * cleansing writeback. The difference between
.end = end,
};
- if (mapping->backing_dev_info->memory_backed)
+ if (!mapping_cap_writeback_dirty(mapping))
return 0;
ret = do_writepages(mapping, &wbc);
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
int ret;
- if (mapping->backing_dev_info->memory_backed || !count)
+ if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0) {
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
int ret;
- if (mapping->backing_dev_info->memory_backed || !count)
+ if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0)
return retval;
}
+int filemap_write_and_wait_range(struct address_space *mapping,
+ loff_t lstart, loff_t lend)
+{
+ int retval = 0;
+
+ if (mapping->nrpages) {
+ retval = __filemap_fdatawrite_range(mapping, lstart, lend,
+ WB_SYNC_ALL);
+ if (retval == 0)
+ retval = wait_on_page_writeback_range(mapping,
+ lstart >> PAGE_CACHE_SHIFT,
+ lend >> PAGE_CACHE_SHIFT);
+ }
+ return retval;
+}
+
/*
* This function is used to add newly allocated pagecache pages:
* the page is new, so we can just run SetPageLocked() against it.
int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
- spin_lock_irq(&mapping->tree_lock);
+ write_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) {
page_cache_get(page);
mapping->nrpages++;
pagecache_acct(1);
}
- spin_unlock_irq(&mapping->tree_lock);
+ write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
}
return error;
{
struct page *page;
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page)
page_cache_get(page);
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
return page;
}
{
struct page *page;
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page && TestSetPageLocked(page))
page = NULL;
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
return page;
}
/**
* find_lock_page - locate, pin and lock a pagecache page
*
- * @mapping - the address_space to search
- * @offset - the page index
+ * @mapping: the address_space to search
+ * @offset: the page index
*
* Locates the desired pagecache page, locks it, increments its reference
* count and returns its address.
{
struct page *page;
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
repeat:
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page) {
page_cache_get(page);
if (TestSetPageLocked(page)) {
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
lock_page(page);
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
/* Has the page been truncated while we slept? */
if (page->mapping != mapping || page->index != offset) {
}
}
}
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
return page;
}
/**
* find_or_create_page - locate or add a pagecache page
*
- * @mapping - the page's address_space
- * @index - the page's index into the mapping
- * @gfp_mask - page allocation mode
+ * @mapping: the page's address_space
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
*
* Locates a page in the pagecache. If the page is not present, a new page
* is allocated using @gfp_mask and is added to the pagecache and to the VM's
unsigned int i;
unsigned int ret;
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
ret = radix_tree_gang_lookup(&mapping->page_tree,
(void **)pages, start, nr_pages);
for (i = 0; i < ret; i++)
page_cache_get(pages[i]);
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
return ret;
}
unsigned int i;
unsigned int ret;
- spin_lock_irq(&mapping->tree_lock);
+ read_lock_irq(&mapping->tree_lock);
ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
(void **)pages, *index, nr_pages, tag);
for (i = 0; i < ret; i++)
page_cache_get(pages[i]);
if (ret)
*index = pages[ret - 1]->index + 1;
- spin_unlock_irq(&mapping->tree_lock);
+ read_unlock_irq(&mapping->tree_lock);
return ret;
}
grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
{
struct page *page = find_get_page(mapping, index);
- int gfp_mask;
+ unsigned int gfp_mask;
if (page) {
if (!TestSetPageLocked(page))
unsigned long index;
unsigned long end_index;
unsigned long offset;
- unsigned long req_size;
+ unsigned long last_index;
unsigned long next_index;
unsigned long prev_index;
loff_t isize;
index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index;
prev_index = ra.prev_page;
- req_size = (desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode);
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) {
struct page *page;
- unsigned long ret_size, nr, ret;
+ unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
nr = nr - offset;
cond_resched();
- if (index == next_index && req_size) {
- ret_size = page_cache_readahead(mapping, &ra,
- filp, index, req_size);
- next_index += ret_size;
- req_size -= ret_size;
- }
+ if (index == next_index)
+ next_index = page_cache_readahead(mapping, &ra, filp,
+ index, last_index - index);
find_page:
page = find_get_page(mapping, index);
if (pos < size) {
retval = generic_file_direct_IO(READ, iocb,
iov, pos, nr_segs);
- if (retval >= 0 && !is_sync_kiocb(iocb))
+ if (retval > 0 && !is_sync_kiocb(iocb))
retval = -EIOCBQUEUED;
if (retval > 0)
*ppos = pos + retval;
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*/
-struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int *type)
+struct page *filemap_nopage(struct vm_area_struct *area,
+ unsigned long address, int *type)
{
int error;
struct file *file = area->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
- unsigned long size, pgoff, endoff;
+ unsigned long size, pgoff;
int did_readaround = 0, majmin = VM_FAULT_MINOR;
- pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
- endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+ pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (VM_RandomReadHint(area))
goto no_cached_page;
- /*
- * The "size" of the file, as far as mmap is concerned, isn't bigger
- * than the mapping
- */
- if (size > endoff)
- size = endoff;
-
/*
* The readahead code wants to be told about each and every page
* so it can build and shrink its windows appropriately
* Ok, found a page in the page cache, now we need to check
* that it's up-to-date.
*/
- if (!PageUptodate(page))
+ if (!PageUptodate(page)) {
+ if (nonblock) {
+ page_cache_release(page);
+ return NULL;
+ }
goto page_not_uptodate;
+ }
success:
/*
buf = iov->iov_base + written;
else {
filemap_set_next_iovec(&cur_iov, &iov_base, written);
- buf = iov->iov_base + iov_base;
+ buf = cur_iov->iov_base + iov_base;
}
do {
unsigned long index;
unsigned long offset;
+ unsigned long maxlen;
size_t copied;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
* same page as we're writing to, without it being marked
* up-to-date.
*/
- fault_in_pages_readable(buf, bytes);
+ maxlen = cur_iov->iov_len - iov_base;
+ if (maxlen > bytes)
+ maxlen = bytes;
+ fault_in_pages_readable(buf, maxlen);
page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
if (!page) {
count -= status;
pos += status;
buf += status;
- if (unlikely(nr_segs > 1))
+ if (unlikely(nr_segs > 1)) {
filemap_set_next_iovec(&cur_iov,
&iov_base, status);
+ buf = cur_iov->iov_base + iov_base;
+ } else {
+ iov_base += status;
+ }
}
}
if (unlikely(copied != bytes))
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
ssize_t retval;
+ size_t write_len = 0;
/*
* If it's a write, unmap all mmappings of the file up-front. This
* will cause any pte dirty bits to be propagated into the pageframes
* for the subsequent filemap_write_and_wait().
*/
- if (rw == WRITE && mapping_mapped(mapping))
- unmap_mapping_range(mapping, 0, -1, 0);
+ if (rw == WRITE) {
+ write_len = iov_length(iov, nr_segs);
+ if (mapping_mapped(mapping))
+ unmap_mapping_range(mapping, offset, write_len, 0);
+ }
retval = filemap_write_and_wait(mapping);
if (retval == 0) {
retval = mapping->a_ops->direct_IO(rw, iocb, iov,
offset, nr_segs);
if (rw == WRITE && mapping->nrpages) {
- int err = invalidate_inode_pages2(mapping);
+ pgoff_t end = (offset + write_len - 1)
+ >> PAGE_CACHE_SHIFT;
+ int err = invalidate_inode_pages2_range(mapping,
+ offset >> PAGE_CACHE_SHIFT, end);
if (err)
retval = err;
}