#include <linux/security.h>
#include <linux/syscalls.h>
/*
+ * This is needed for the following functions:
+ * - try_to_release_page
+ * - block_invalidatepage
+ * - generic_osync_inode
+ *
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
#include <linux/buffer_head.h> /* for generic_osync_inode */
{
struct address_space *mapping = page->mapping;
- BUG_ON(!PageLocked(page));
+ if (unlikely(!PageLocked(page)))
+ PAGE_BUG(page);
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
}
static int sync_page(void *word)
page = container_of((page_flags_t *)word, struct page, flags);
/*
- * page_mapping() is being called without PG_locked held.
- * Some knowledge of the state and use of the page is used to
- * reduce the requirements down to a memory barrier.
- * The danger here is of a stale page_mapping() return value
- * indicating a struct address_space different from the one it's
- * associated with when it is associated with one.
- * After smp_mb(), it's either the correct page_mapping() for
- * the page, or an old page_mapping() and the page's own
- * page_mapping() has gone NULL.
- * The ->sync_page() address_space operation must tolerate
- * page_mapping() going NULL. By an amazing coincidence,
- * this comes about because none of the users of the page
- * in the ->sync_page() methods make essential use of the
- * page_mapping(), merely passing the page down to the backing
- * device's unplug functions when it's non-NULL, which in turn
- * ignore it for all cases but swap, where only page->private is
- * of interest. When page_mapping() does go NULL, the entire
- * call stack gracefully ignores the page and returns.
- * -- wli
+ * FIXME, fercrissake. What is this barrier here for?
*/
smp_mb();
mapping = page_mapping(page);
/**
* filemap_fdatawrite_range - start writeback against all of a mapping's
* dirty pages that lie within the byte offsets <start, end>
- * @mapping: address space structure to write
- * @start: offset in bytes where the range starts
- * @end: offset in bytes where the range ends
- * @sync_mode: enable synchronous operation
+ * @mapping: address space structure to write
+ * @start: offset in bytes where the range starts
+ * @end : offset in bytes where the range ends
*
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
* opposed to a regular memory * cleansing writeback. The difference between
.end = end,
};
- if (!mapping_cap_writeback_dirty(mapping))
+ if (mapping->backing_dev_info->memory_backed)
return 0;
ret = do_writepages(mapping, &wbc);
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
int ret;
- if (!mapping_cap_writeback_dirty(mapping) || !count)
+ if (mapping->backing_dev_info->memory_backed || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0) {
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
int ret;
- if (!mapping_cap_writeback_dirty(mapping) || !count)
+ if (mapping->backing_dev_info->memory_backed || !count)
return 0;
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
if (ret == 0)
return retval;
}
-int filemap_write_and_wait_range(struct address_space *mapping,
- loff_t lstart, loff_t lend)
-{
- int retval = 0;
-
- if (mapping->nrpages) {
- retval = __filemap_fdatawrite_range(mapping, lstart, lend,
- WB_SYNC_ALL);
- if (retval == 0)
- retval = wait_on_page_writeback_range(mapping,
- lstart >> PAGE_CACHE_SHIFT,
- lend >> PAGE_CACHE_SHIFT);
- }
- return retval;
-}
-
/*
* This function is used to add newly allocated pagecache pages:
* the page is new, so we can just run SetPageLocked() against it.
int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) {
page_cache_get(page);
mapping->nrpages++;
pagecache_acct(1);
}
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
}
return error;
{
struct page *page;
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page)
page_cache_get(page);
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return page;
}
{
struct page *page;
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page && TestSetPageLocked(page))
page = NULL;
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return page;
}
/**
* find_lock_page - locate, pin and lock a pagecache page
*
- * @mapping: the address_space to search
- * @offset: the page index
+ * @mapping - the address_space to search
+ * @offset - the page index
*
* Locates the desired pagecache page, locks it, increments its reference
* count and returns its address.
{
struct page *page;
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
repeat:
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page) {
page_cache_get(page);
if (TestSetPageLocked(page)) {
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
lock_page(page);
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
/* Has the page been truncated while we slept? */
if (page->mapping != mapping || page->index != offset) {
}
}
}
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return page;
}
/**
* find_or_create_page - locate or add a pagecache page
*
- * @mapping: the page's address_space
- * @index: the page's index into the mapping
- * @gfp_mask: page allocation mode
+ * @mapping - the page's address_space
+ * @index - the page's index into the mapping
+ * @gfp_mask - page allocation mode
*
* Locates a page in the pagecache. If the page is not present, a new page
* is allocated using @gfp_mask and is added to the pagecache and to the VM's
unsigned int i;
unsigned int ret;
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
ret = radix_tree_gang_lookup(&mapping->page_tree,
(void **)pages, start, nr_pages);
for (i = 0; i < ret; i++)
page_cache_get(pages[i]);
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return ret;
}
unsigned int i;
unsigned int ret;
- read_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
(void **)pages, *index, nr_pages, tag);
for (i = 0; i < ret; i++)
page_cache_get(pages[i]);
if (ret)
*index = pages[ret - 1]->index + 1;
- read_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return ret;
}
grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
{
struct page *page = find_get_page(mapping, index);
- unsigned int gfp_mask;
+ int gfp_mask;
if (page) {
if (!TestSetPageLocked(page))
int nonblock)
{
struct inode *inode = mapping->host;
- unsigned long index;
- unsigned long end_index;
- unsigned long offset;
- unsigned long last_index;
- unsigned long next_index;
- unsigned long prev_index;
+ unsigned long index, end_index, offset;
loff_t isize;
struct page *cached_page;
int error;
cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
- next_index = index;
- prev_index = ra.prev_page;
- last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode);
nr = nr - offset;
cond_resched();
- if (index == next_index)
- next_index = page_cache_readahead(mapping, &ra, filp,
- index, last_index - index);
+ page_cache_readahead(mapping, &ra, filp, index);
find_page:
page = find_get_page(mapping, index);
flush_dcache_page(page);
/*
- * When (part of) the same page is read multiple times
- * in succession, only mark it as accessed the first time.
+ * Mark the page accessed if we read the beginning.
*/
- if (prev_index != index)
+ if (!offset)
mark_page_accessed(page);
- prev_index = index;
/*
* Ok, we have the page, and it's up-to-date, so
goto readpage_error;
if (!PageUptodate(page)) {
- lock_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page)) {
- if (page->mapping == NULL) {
- /*
- * invalidate_inode_pages got it
- */
- unlock_page(page);
- page_cache_release(page);
- goto find_page;
- }
- unlock_page(page);
error = -EIO;
goto readpage_error;
}
- unlock_page(page);
}
/*
if (pos < size) {
retval = generic_file_direct_IO(READ, iocb,
iov, pos, nr_segs);
- if (retval > 0 && !is_sync_kiocb(iocb))
+ if (retval >= 0 && !is_sync_kiocb(iocb))
retval = -EIOCBQUEUED;
if (retval > 0)
*ppos = pos + retval;
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*/
-struct page *filemap_nopage(struct vm_area_struct *area,
- unsigned long address, int *type)
+struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int *type)
{
int error;
struct file *file = area->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
- unsigned long size, pgoff;
+ unsigned long size, pgoff, endoff;
int did_readaround = 0, majmin = VM_FAULT_MINOR;
- pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+ pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+ endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (VM_RandomReadHint(area))
goto no_cached_page;
+ /*
+ * The "size" of the file, as far as mmap is concerned, isn't bigger
+ * than the mapping
+ */
+ if (size > endoff)
+ size = endoff;
+
/*
* The readahead code wants to be told about each and every page
* so it can build and shrink its windows appropriately
* For sequential accesses, we use the generic readahead logic.
*/
if (VM_SequentialReadHint(area))
- page_cache_readahead(mapping, ra, file, pgoff, 1);
+ page_cache_readahead(mapping, ra, file, pgoff);
/*
* Do we have something in the page cache already?
* Ok, found a page in the page cache, now we need to check
* that it's up-to-date.
*/
- if (!PageUptodate(page)) {
- if (nonblock) {
- page_cache_release(page);
- return NULL;
- }
+ if (!PageUptodate(page))
goto page_not_uptodate;
- }
success:
/*
return NULL;
}
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
- unsigned long len, pgprot_t prot, unsigned long pgoff,
- int nonblock)
+static int filemap_populate(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long len,
+ pgprot_t prot,
+ unsigned long pgoff,
+ int nonblock)
{
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
vma->vm_ops = &generic_file_vm_ops;
return 0;
}
-EXPORT_SYMBOL(filemap_populate);
/*
* This is for filesystems which do not implement ->writepage.
pagevec_init(&lru_pvec, 0);
- /*
- * handle partial DIO write. Adjust cur_iov if needed.
- */
- if (likely(nr_segs == 1))
- buf = iov->iov_base + written;
- else {
- filemap_set_next_iovec(&cur_iov, &iov_base, written);
- buf = cur_iov->iov_base + iov_base;
- }
-
+ buf = iov->iov_base + written; /* handle partial DIO write */
do {
unsigned long index;
unsigned long offset;
- unsigned long maxlen;
size_t copied;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
* same page as we're writing to, without it being marked
* up-to-date.
*/
- maxlen = cur_iov->iov_len - iov_base;
- if (maxlen > bytes)
- maxlen = bytes;
- fault_in_pages_readable(buf, maxlen);
+ fault_in_pages_readable(buf, bytes);
page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
if (!page) {
count -= status;
pos += status;
buf += status;
- if (unlikely(nr_segs > 1)) {
+ if (unlikely(nr_segs > 1))
filemap_set_next_iovec(&cur_iov,
&iov_base, status);
- buf = cur_iov->iov_base + iov_base;
- } else {
- iov_base += status;
- }
}
}
if (unlikely(copied != bytes))
count = ocount;
pos = *ppos;
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
written = 0;
BUG_ON(iocb->ki_pos != pos);
down(&inode->i_sem);
- ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
+ ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,
&iocb->ki_pos);
up(&inode->i_sem);
EXPORT_SYMBOL(generic_file_writev);
/*
- * Called under i_sem for writes to S_ISREG files. Returns -EIO if something
- * went wrong during pagecache shootdown.
+ * Called under i_sem for writes to S_ISREG files
*/
ssize_t
generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
ssize_t retval;
- size_t write_len = 0;
-
- /*
- * If it's a write, unmap all mmappings of the file up-front. This
- * will cause any pte dirty bits to be propagated into the pageframes
- * for the subsequent filemap_write_and_wait().
- */
- if (rw == WRITE) {
- write_len = iov_length(iov, nr_segs);
- if (mapping_mapped(mapping))
- unmap_mapping_range(mapping, offset, write_len, 0);
- }
retval = filemap_write_and_wait(mapping);
if (retval == 0) {
retval = mapping->a_ops->direct_IO(rw, iocb, iov,
offset, nr_segs);
- if (rw == WRITE && mapping->nrpages) {
- pgoff_t end = (offset + write_len - 1)
- >> PAGE_CACHE_SHIFT;
- int err = invalidate_inode_pages2_range(mapping,
- offset >> PAGE_CACHE_SHIFT, end);
- if (err)
- retval = err;
- }
+ if (rw == WRITE && mapping->nrpages)
+ invalidate_inode_pages2(mapping);
}
return retval;
}