This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / mm / filemap.c
index e72944c..7c4dbca 100644 (file)
 #include <linux/security.h>
 #include <linux/syscalls.h>
 /*
+ * This is needed for the following functions:
+ *  - try_to_release_page
+ *  - block_invalidatepage
+ *  - generic_osync_inode
+ *
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
 #include <linux/buffer_head.h> /* for generic_osync_inode */
@@ -118,11 +123,12 @@ void remove_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
 
-       BUG_ON(!PageLocked(page));
+       if (unlikely(!PageLocked(page)))
+               PAGE_BUG(page);
 
-       write_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
        __remove_from_page_cache(page);
-       write_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
 }
 
 static int sync_page(void *word)
@@ -133,25 +139,7 @@ static int sync_page(void *word)
        page = container_of((page_flags_t *)word, struct page, flags);
 
        /*
-        * page_mapping() is being called without PG_locked held.
-        * Some knowledge of the state and use of the page is used to
-        * reduce the requirements down to a memory barrier.
-        * The danger here is of a stale page_mapping() return value
-        * indicating a struct address_space different from the one it's
-        * associated with when it is associated with one.
-        * After smp_mb(), it's either the correct page_mapping() for
-        * the page, or an old page_mapping() and the page's own
-        * page_mapping() has gone NULL.
-        * The ->sync_page() address_space operation must tolerate
-        * page_mapping() going NULL. By an amazing coincidence,
-        * this comes about because none of the users of the page
-        * in the ->sync_page() methods make essential use of the
-        * page_mapping(), merely passing the page down to the backing
-        * device's unplug functions when it's non-NULL, which in turn
-        * ignore it for all cases but swap, where only page->private is
-        * of interest. When page_mapping() does go NULL, the entire
-        * call stack gracefully ignores the page and returns.
-        * -- wli
+        * FIXME, fercrissake.  What is this barrier here for?
         */
        smp_mb();
        mapping = page_mapping(page);
@@ -164,10 +152,9 @@ static int sync_page(void *word)
 /**
  * filemap_fdatawrite_range - start writeback against all of a mapping's
  * dirty pages that lie within the byte offsets <start, end>
- * @mapping:   address space structure to write
- * @start:     offset in bytes where the range starts
- * @end:       offset in bytes where the range ends
- * @sync_mode: enable synchronous operation
+ * @mapping: address space structure to write
+ * @start: offset in bytes where the range starts
+ * @end : offset in bytes where the range ends
  *
  * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
  * opposed to a regular memory * cleansing writeback.  The difference between
@@ -185,7 +172,7 @@ static int __filemap_fdatawrite_range(struct address_space *mapping,
                .end = end,
        };
 
-       if (!mapping_cap_writeback_dirty(mapping))
+       if (mapping->backing_dev_info->memory_backed)
                return 0;
 
        ret = do_writepages(mapping, &wbc);
@@ -282,7 +269,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
        pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
        int ret;
 
-       if (!mapping_cap_writeback_dirty(mapping) || !count)
+       if (mapping->backing_dev_info->memory_backed || !count)
                return 0;
        ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
        if (ret == 0) {
@@ -308,7 +295,7 @@ int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
        pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
        int ret;
 
-       if (!mapping_cap_writeback_dirty(mapping) || !count)
+       if (mapping->backing_dev_info->memory_backed || !count)
                return 0;
        ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
        if (ret == 0)
@@ -349,22 +336,6 @@ int filemap_write_and_wait(struct address_space *mapping)
        return retval;
 }
 
-int filemap_write_and_wait_range(struct address_space *mapping,
-                                loff_t lstart, loff_t lend)
-{
-       int retval = 0;
-
-       if (mapping->nrpages) {
-               retval = __filemap_fdatawrite_range(mapping, lstart, lend,
-                                                   WB_SYNC_ALL);
-               if (retval == 0)
-                       retval = wait_on_page_writeback_range(mapping,
-                                                   lstart >> PAGE_CACHE_SHIFT,
-                                                   lend >> PAGE_CACHE_SHIFT);
-       }
-       return retval;
-}
-
 /*
  * This function is used to add newly allocated pagecache pages:
  * the page is new, so we can just run SetPageLocked() against it.
@@ -378,7 +349,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
        int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 
        if (error == 0) {
-               write_lock_irq(&mapping->tree_lock);
+               spin_lock_irq(&mapping->tree_lock);
                error = radix_tree_insert(&mapping->page_tree, offset, page);
                if (!error) {
                        page_cache_get(page);
@@ -388,7 +359,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
                        mapping->nrpages++;
                        pagecache_acct(1);
                }
-               write_unlock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
                radix_tree_preload_end();
        }
        return error;
@@ -501,11 +472,11 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
 {
        struct page *page;
 
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
        page = radix_tree_lookup(&mapping->page_tree, offset);
        if (page)
                page_cache_get(page);
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
        return page;
 }
 
@@ -518,11 +489,11 @@ struct page *find_trylock_page(struct address_space *mapping, unsigned long offs
 {
        struct page *page;
 
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
        page = radix_tree_lookup(&mapping->page_tree, offset);
        if (page && TestSetPageLocked(page))
                page = NULL;
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
        return page;
 }
 
@@ -531,8 +502,8 @@ EXPORT_SYMBOL(find_trylock_page);
 /**
  * find_lock_page - locate, pin and lock a pagecache page
  *
- * @mapping: the address_space to search
- * @offset: the page index
+ * @mapping - the address_space to search
+ * @offset - the page index
  *
  * Locates the desired pagecache page, locks it, increments its reference
  * count and returns its address.
@@ -544,15 +515,15 @@ struct page *find_lock_page(struct address_space *mapping,
 {
        struct page *page;
 
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
 repeat:
        page = radix_tree_lookup(&mapping->page_tree, offset);
        if (page) {
                page_cache_get(page);
                if (TestSetPageLocked(page)) {
-                       read_unlock_irq(&mapping->tree_lock);
+                       spin_unlock_irq(&mapping->tree_lock);
                        lock_page(page);
-                       read_lock_irq(&mapping->tree_lock);
+                       spin_lock_irq(&mapping->tree_lock);
 
                        /* Has the page been truncated while we slept? */
                        if (page->mapping != mapping || page->index != offset) {
@@ -562,7 +533,7 @@ repeat:
                        }
                }
        }
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
        return page;
 }
 
@@ -571,9 +542,9 @@ EXPORT_SYMBOL(find_lock_page);
 /**
  * find_or_create_page - locate or add a pagecache page
  *
- * @mapping: the page's address_space
- * @index: the page's index into the mapping
- * @gfp_mask: page allocation mode
+ * @mapping - the page's address_space
+ * @index - the page's index into the mapping
+ * @gfp_mask - page allocation mode
  *
  * Locates a page in the pagecache.  If the page is not present, a new page
  * is allocated using @gfp_mask and is added to the pagecache and to the VM's
@@ -636,12 +607,12 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
        unsigned int i;
        unsigned int ret;
 
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
        ret = radix_tree_gang_lookup(&mapping->page_tree,
                                (void **)pages, start, nr_pages);
        for (i = 0; i < ret; i++)
                page_cache_get(pages[i]);
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
        return ret;
 }
 
@@ -655,14 +626,14 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
        unsigned int i;
        unsigned int ret;
 
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
        ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
                                (void **)pages, *index, nr_pages, tag);
        for (i = 0; i < ret; i++)
                page_cache_get(pages[i]);
        if (ret)
                *index = pages[ret - 1]->index + 1;
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
        return ret;
 }
 
@@ -679,7 +650,7 @@ struct page *
 grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 {
        struct page *page = find_get_page(mapping, index);
-       unsigned int gfp_mask;
+       int gfp_mask;
 
        if (page) {
                if (!TestSetPageLocked(page))
@@ -718,12 +689,7 @@ void do_generic_mapping_read(struct address_space *mapping,
                             int nonblock)
 {
        struct inode *inode = mapping->host;
-       unsigned long index;
-       unsigned long end_index;
-       unsigned long offset;
-       unsigned long last_index;
-       unsigned long next_index;
-       unsigned long prev_index;
+       unsigned long index, end_index, offset;
        loff_t isize;
        struct page *cached_page;
        int error;
@@ -731,9 +697,6 @@ void do_generic_mapping_read(struct address_space *mapping,
 
        cached_page = NULL;
        index = *ppos >> PAGE_CACHE_SHIFT;
-       next_index = index;
-       prev_index = ra.prev_page;
-       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
 
        isize = i_size_read(inode);
@@ -758,9 +721,7 @@ void do_generic_mapping_read(struct address_space *mapping,
                nr = nr - offset;
 
                cond_resched();
-               if (index == next_index)
-                       next_index = page_cache_readahead(mapping, &ra, filp,
-                                       index, last_index - index);
+               page_cache_readahead(mapping, &ra, filp, index);
 
 find_page:
                page = find_get_page(mapping, index);
@@ -790,12 +751,10 @@ page_ok:
                        flush_dcache_page(page);
 
                /*
-                * When (part of) the same page is read multiple times
-                * in succession, only mark it as accessed the first time.
+                * Mark the page accessed if we read the beginning.
                 */
-               if (prev_index != index)
+               if (!offset)
                        mark_page_accessed(page);
-               prev_index = index;
 
                /*
                 * Ok, we have the page, and it's up-to-date, so
@@ -842,21 +801,11 @@ readpage:
                        goto readpage_error;
 
                if (!PageUptodate(page)) {
-                       lock_page(page);
+                       wait_on_page_locked(page);
                        if (!PageUptodate(page)) {
-                               if (page->mapping == NULL) {
-                                       /*
-                                        * invalidate_inode_pages got it
-                                        */
-                                       unlock_page(page);
-                                       page_cache_release(page);
-                                       goto find_page;
-                               }
-                               unlock_page(page);
                                error = -EIO;
                                goto readpage_error;
                        }
-                       unlock_page(page);
                }
 
                /*
@@ -1015,7 +964,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                if (pos < size) {
                        retval = generic_file_direct_IO(READ, iocb,
                                                iov, pos, nr_segs);
-                       if (retval > 0 && !is_sync_kiocb(iocb))
+                       if (retval >= 0 && !is_sync_kiocb(iocb))
                                retval = -EIOCBQUEUED;
                        if (retval > 0)
                                *ppos = pos + retval;
@@ -1191,8 +1140,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
  */
-struct page *filemap_nopage(struct vm_area_struct *area,
-                               unsigned long address, int *type)
+struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int *type)
 {
        int error;
        struct file *file = area->vm_file;
@@ -1200,10 +1148,11 @@ struct page *filemap_nopage(struct vm_area_struct *area,
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
        struct page *page;
-       unsigned long size, pgoff;
+       unsigned long size, pgoff, endoff;
        int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
-       pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+       pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+       endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -1214,6 +1163,13 @@ retry_all:
        if (VM_RandomReadHint(area))
                goto no_cached_page;
 
+       /*
+        * The "size" of the file, as far as mmap is concerned, isn't bigger
+        * than the mapping
+        */
+       if (size > endoff)
+               size = endoff;
+
        /*
         * The readahead code wants to be told about each and every page
         * so it can build and shrink its windows appropriately
@@ -1221,7 +1177,7 @@ retry_all:
         * For sequential accesses, we use the generic readahead logic.
         */
        if (VM_SequentialReadHint(area))
-               page_cache_readahead(mapping, ra, file, pgoff, 1);
+               page_cache_readahead(mapping, ra, file, pgoff);
 
        /*
         * Do we have something in the page cache already?
@@ -1403,13 +1359,8 @@ retry_find:
         * Ok, found a page in the page cache, now we need to check
         * that it's up-to-date.
         */
-       if (!PageUptodate(page)) {
-               if (nonblock) {
-                       page_cache_release(page);
-                       return NULL;
-               }
+       if (!PageUptodate(page))
                goto page_not_uptodate;
-       }
 
 success:
        /*
@@ -1493,9 +1444,12 @@ err:
        return NULL;
 }
 
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
-               unsigned long len, pgprot_t prot, unsigned long pgoff,
-               int nonblock)
+static int filemap_populate(struct vm_area_struct *vma,
+                       unsigned long addr,
+                       unsigned long len,
+                       pgprot_t prot,
+                       unsigned long pgoff,
+                       int nonblock)
 {
        struct file *file = vma->vm_file;
        struct address_space *mapping = file->f_mapping;
@@ -1555,7 +1509,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
        vma->vm_ops = &generic_file_vm_ops;
        return 0;
 }
-EXPORT_SYMBOL(filemap_populate);
 
 /*
  * This is for filesystems which do not implement ->writepage.
@@ -1966,20 +1919,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 
        pagevec_init(&lru_pvec, 0);
 
-       /*
-        * handle partial DIO write.  Adjust cur_iov if needed.
-        */
-       if (likely(nr_segs == 1))
-               buf = iov->iov_base + written;
-       else {
-               filemap_set_next_iovec(&cur_iov, &iov_base, written);
-               buf = cur_iov->iov_base + iov_base;
-       }
-
+       buf = iov->iov_base + written;  /* handle partial DIO write */
        do {
                unsigned long index;
                unsigned long offset;
-               unsigned long maxlen;
                size_t copied;
 
                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
@@ -1994,10 +1937,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                 * same page as we're writing to, without it being marked
                 * up-to-date.
                 */
-               maxlen = cur_iov->iov_len - iov_base;
-               if (maxlen > bytes)
-                       maxlen = bytes;
-               fault_in_pages_readable(buf, maxlen);
+               fault_in_pages_readable(buf, bytes);
 
                page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
                if (!page) {
@@ -2035,13 +1975,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                                count -= status;
                                pos += status;
                                buf += status;
-                               if (unlikely(nr_segs > 1)) {
+                               if (unlikely(nr_segs > 1))
                                        filemap_set_next_iovec(&cur_iov,
                                                        &iov_base, status);
-                                       buf = cur_iov->iov_base + iov_base;
-                               } else {
-                                       iov_base += status;
-                               }
                        }
                }
                if (unlikely(copied != bytes))
@@ -2121,8 +2057,6 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
        count = ocount;
        pos = *ppos;
 
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
        written = 0;
@@ -2226,7 +2160,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
        BUG_ON(iocb->ki_pos != pos);
 
        down(&inode->i_sem);
-       ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
+       ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,
                                                &iocb->ki_pos);
        up(&inode->i_sem);
 
@@ -2302,8 +2236,7 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
 EXPORT_SYMBOL(generic_file_writev);
 
 /*
- * Called under i_sem for writes to S_ISREG files.   Returns -EIO if something
- * went wrong during pagecache shootdown.
+ * Called under i_sem for writes to S_ISREG files
  */
 ssize_t
 generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
@@ -2312,31 +2245,13 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        ssize_t retval;
-       size_t write_len = 0;
-
-       /*
-        * If it's a write, unmap all mmappings of the file up-front.  This
-        * will cause any pte dirty bits to be propagated into the pageframes
-        * for the subsequent filemap_write_and_wait().
-        */
-       if (rw == WRITE) {
-               write_len = iov_length(iov, nr_segs);
-               if (mapping_mapped(mapping))
-                       unmap_mapping_range(mapping, offset, write_len, 0);
-       }
 
        retval = filemap_write_and_wait(mapping);
        if (retval == 0) {
                retval = mapping->a_ops->direct_IO(rw, iocb, iov,
                                                offset, nr_segs);
-               if (rw == WRITE && mapping->nrpages) {
-                       pgoff_t end = (offset + write_len - 1)
-                                               >> PAGE_CACHE_SHIFT;
-                       int err = invalidate_inode_pages2_range(mapping,
-                                       offset >> PAGE_CACHE_SHIFT, end);
-                       if (err)
-                               retval = err;
-               }
+               if (rw == WRITE && mapping->nrpages)
+                       invalidate_inode_pages2(mapping);
        }
        return retval;
 }