This commit was manufactured by cvs2svn to create tag

[linux-2.6.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index e72944c..7c4dbca 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -29,6 +29,11 @@
  #include <linux/security.h>
  #include <linux/syscalls.h>
  /*
+ * This is needed for the following functions:
+ *  - try_to_release_page
+ *  - block_invalidatepage
+ *  - generic_osync_inode
+ *
   * FIXME: remove all knowledge of the buffer layer from the core VM
   */
  #include <linux/buffer_head.h> /* for generic_osync_inode */
@@ -118,11 +123,12 @@ void remove_from_page_cache(struct page *page)
  {
         struct address_space *mapping = page->mapping;
  
-       BUG_ON(!PageLocked(page));
+       if (unlikely(!PageLocked(page)))
+               PAGE_BUG(page);
  
-       write_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         __remove_from_page_cache(page);
-       write_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
  }
  
  static int sync_page(void *word)
@@ -133,25 +139,7 @@ static int sync_page(void *word)
         page = container_of((page_flags_t *)word, struct page, flags);
  
         /*
-        * page_mapping() is being called without PG_locked held.
-        * Some knowledge of the state and use of the page is used to
-        * reduce the requirements down to a memory barrier.
-        * The danger here is of a stale page_mapping() return value
-        * indicating a struct address_space different from the one it's
-        * associated with when it is associated with one.
-        * After smp_mb(), it's either the correct page_mapping() for
-        * the page, or an old page_mapping() and the page's own
-        * page_mapping() has gone NULL.
-        * The ->sync_page() address_space operation must tolerate
-        * page_mapping() going NULL. By an amazing coincidence,
-        * this comes about because none of the users of the page
-        * in the ->sync_page() methods make essential use of the
-        * page_mapping(), merely passing the page down to the backing
-        * device's unplug functions when it's non-NULL, which in turn
-        * ignore it for all cases but swap, where only page->private is
-        * of interest. When page_mapping() does go NULL, the entire
-        * call stack gracefully ignores the page and returns.
-        * -- wli
+        * FIXME, fercrissake.  What is this barrier here for?
          */
         smp_mb();
         mapping = page_mapping(page);
@@ -164,10 +152,9 @@ static int sync_page(void *word)
  /**
   * filemap_fdatawrite_range - start writeback against all of a mapping's
   * dirty pages that lie within the byte offsets <start, end>
- * @mapping:   address space structure to write
- * @start:     offset in bytes where the range starts
- * @end:       offset in bytes where the range ends
- * @sync_mode: enable synchronous operation
+ * @mapping: address space structure to write
+ * @start: offset in bytes where the range starts
+ * @end : offset in bytes where the range ends
   *
   * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
   * opposed to a regular memory * cleansing writeback.  The difference between
@@ -185,7 +172,7 @@ static int __filemap_fdatawrite_range(struct address_space *mapping,
                 .end = end,
         };
  
-       if (!mapping_cap_writeback_dirty(mapping))
+       if (mapping->backing_dev_info->memory_backed)
                 return 0;
  
         ret = do_writepages(mapping, &wbc);
@@ -282,7 +269,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
         pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
         int ret;
  
-       if (!mapping_cap_writeback_dirty(mapping) || !count)
+       if (mapping->backing_dev_info->memory_backed || !count)
                 return 0;
         ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
         if (ret == 0) {
@@ -308,7 +295,7 @@ int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
         pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
         int ret;
  
-       if (!mapping_cap_writeback_dirty(mapping) || !count)
+       if (mapping->backing_dev_info->memory_backed || !count)
                 return 0;
         ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
         if (ret == 0)
@@ -349,22 +336,6 @@ int filemap_write_and_wait(struct address_space *mapping)
         return retval;
  }
  
-int filemap_write_and_wait_range(struct address_space *mapping,
-                                loff_t lstart, loff_t lend)
-{
-       int retval = 0;
-
-       if (mapping->nrpages) {
-               retval = __filemap_fdatawrite_range(mapping, lstart, lend,
-                                                   WB_SYNC_ALL);
-               if (retval == 0)
-                       retval = wait_on_page_writeback_range(mapping,
-                                                   lstart >> PAGE_CACHE_SHIFT,
-                                                   lend >> PAGE_CACHE_SHIFT);
-       }
-       return retval;
-}
-
  /*
   * This function is used to add newly allocated pagecache pages:
   * the page is new, so we can just run SetPageLocked() against it.
@@ -378,7 +349,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
         int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
  
         if (error == 0) {
-               write_lock_irq(&mapping->tree_lock);
+               spin_lock_irq(&mapping->tree_lock);
                 error = radix_tree_insert(&mapping->page_tree, offset, page);
                 if (!error) {
                         page_cache_get(page);
@@ -388,7 +359,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
                         mapping->nrpages++;
                         pagecache_acct(1);
                 }
-               write_unlock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
                 radix_tree_preload_end();
         }
         return error;
@@ -501,11 +472,11 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
  {
         struct page *page;
  
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         page = radix_tree_lookup(&mapping->page_tree, offset);
         if (page)
                 page_cache_get(page);
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
         return page;
  }
  
@@ -518,11 +489,11 @@ struct page *find_trylock_page(struct address_space *mapping, unsigned long offs
  {
         struct page *page;
  
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         page = radix_tree_lookup(&mapping->page_tree, offset);
         if (page && TestSetPageLocked(page))
                 page = NULL;
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
         return page;
  }
  
@@ -531,8 +502,8 @@ EXPORT_SYMBOL(find_trylock_page);
  /**
   * find_lock_page - locate, pin and lock a pagecache page
   *
- * @mapping: the address_space to search
- * @offset: the page index
+ * @mapping - the address_space to search
+ * @offset - the page index
   *
   * Locates the desired pagecache page, locks it, increments its reference
   * count and returns its address.
@@ -544,15 +515,15 @@ struct page *find_lock_page(struct address_space *mapping,
  {
         struct page *page;
  
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
  repeat:
         page = radix_tree_lookup(&mapping->page_tree, offset);
         if (page) {
                 page_cache_get(page);
                 if (TestSetPageLocked(page)) {
-                       read_unlock_irq(&mapping->tree_lock);
+                       spin_unlock_irq(&mapping->tree_lock);
                         lock_page(page);
-                       read_lock_irq(&mapping->tree_lock);
+                       spin_lock_irq(&mapping->tree_lock);
  
                         /* Has the page been truncated while we slept? */
                         if (page->mapping != mapping || page->index != offset) {
@@ -562,7 +533,7 @@ repeat:
                         }
                 }
         }
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
         return page;
  }
  
@@ -571,9 +542,9 @@ EXPORT_SYMBOL(find_lock_page);
  /**
   * find_or_create_page - locate or add a pagecache page
   *
- * @mapping: the page's address_space
- * @index: the page's index into the mapping
- * @gfp_mask: page allocation mode
+ * @mapping - the page's address_space
+ * @index - the page's index into the mapping
+ * @gfp_mask - page allocation mode
   *
   * Locates a page in the pagecache.  If the page is not present, a new page
   * is allocated using @gfp_mask and is added to the pagecache and to the VM's
@@ -636,12 +607,12 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
         unsigned int i;
         unsigned int ret;
  
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         ret = radix_tree_gang_lookup(&mapping->page_tree,
                                 (void **)pages, start, nr_pages);
         for (i = 0; i < ret; i++)
                 page_cache_get(pages[i]);
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
         return ret;
  }
  
@@ -655,14 +626,14 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
         unsigned int i;
         unsigned int ret;
  
-       read_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
                                 (void **)pages, *index, nr_pages, tag);
         for (i = 0; i < ret; i++)
                 page_cache_get(pages[i]);
         if (ret)
                 *index = pages[ret - 1]->index + 1;
-       read_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
         return ret;
  }
  
@@ -679,7 +650,7 @@ struct page *
  grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
  {
         struct page *page = find_get_page(mapping, index);
-       unsigned int gfp_mask;
+       int gfp_mask;
  
         if (page) {
                 if (!TestSetPageLocked(page))
@@ -718,12 +689,7 @@ void do_generic_mapping_read(struct address_space *mapping,
                              int nonblock)
  {
         struct inode *inode = mapping->host;
-       unsigned long index;
-       unsigned long end_index;
-       unsigned long offset;
-       unsigned long last_index;
-       unsigned long next_index;
-       unsigned long prev_index;
+       unsigned long index, end_index, offset;
         loff_t isize;
         struct page *cached_page;
         int error;
@@ -731,9 +697,6 @@ void do_generic_mapping_read(struct address_space *mapping,
  
         cached_page = NULL;
         index = *ppos >> PAGE_CACHE_SHIFT;
-       next_index = index;
-       prev_index = ra.prev_page;
-       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
         offset = *ppos & ~PAGE_CACHE_MASK;
  
         isize = i_size_read(inode);
@@ -758,9 +721,7 @@ void do_generic_mapping_read(struct address_space *mapping,
                 nr = nr - offset;
  
                 cond_resched();
-               if (index == next_index)
-                       next_index = page_cache_readahead(mapping, &ra, filp,
-                                       index, last_index - index);
+               page_cache_readahead(mapping, &ra, filp, index);
  
  find_page:
                 page = find_get_page(mapping, index);
@@ -790,12 +751,10 @@ page_ok:
                         flush_dcache_page(page);
  
                 /*
-                * When (part of) the same page is read multiple times
-                * in succession, only mark it as accessed the first time.
+                * Mark the page accessed if we read the beginning.
                  */
-               if (prev_index != index)
+               if (!offset)
                         mark_page_accessed(page);
-               prev_index = index;
  
                 /*
                  * Ok, we have the page, and it's up-to-date, so
@@ -842,21 +801,11 @@ readpage:
                         goto readpage_error;
  
                 if (!PageUptodate(page)) {
-                       lock_page(page);
+                       wait_on_page_locked(page);
                         if (!PageUptodate(page)) {
-                               if (page->mapping == NULL) {
-                                       /*
-                                        * invalidate_inode_pages got it
-                                        */
-                                       unlock_page(page);
-                                       page_cache_release(page);
-                                       goto find_page;
-                               }
-                               unlock_page(page);
                                 error = -EIO;
                                 goto readpage_error;
                         }
-                       unlock_page(page);
                 }
  
                 /*
@@ -1015,7 +964,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                 if (pos < size) {
                         retval = generic_file_direct_IO(READ, iocb,
                                                 iov, pos, nr_segs);
-                       if (retval > 0 && !is_sync_kiocb(iocb))
+                       if (retval >= 0 && !is_sync_kiocb(iocb))
                                 retval = -EIOCBQUEUED;
                         if (retval > 0)
                                 *ppos = pos + retval;
@@ -1191,8 +1140,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
   * it in the page cache, and handles the special cases reasonably without
   * having a lot of duplicated code.
   */
-struct page *filemap_nopage(struct vm_area_struct *area,
-                               unsigned long address, int *type)
+struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int *type)
  {
         int error;
         struct file *file = area->vm_file;
@@ -1200,10 +1148,11 @@ struct page *filemap_nopage(struct vm_area_struct *area,
         struct file_ra_state *ra = &file->f_ra;
         struct inode *inode = mapping->host;
         struct page *page;
-       unsigned long size, pgoff;
+       unsigned long size, pgoff, endoff;
         int did_readaround = 0, majmin = VM_FAULT_MINOR;
  
-       pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+       pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
+       endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
  
  retry_all:
         size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -1214,6 +1163,13 @@ retry_all:
         if (VM_RandomReadHint(area))
                 goto no_cached_page;
  
+       /*
+        * The "size" of the file, as far as mmap is concerned, isn't bigger
+        * than the mapping
+        */
+       if (size > endoff)
+               size = endoff;
+
         /*
          * The readahead code wants to be told about each and every page
          * so it can build and shrink its windows appropriately
@@ -1221,7 +1177,7 @@ retry_all:
          * For sequential accesses, we use the generic readahead logic.
          */
         if (VM_SequentialReadHint(area))
-               page_cache_readahead(mapping, ra, file, pgoff, 1);
+               page_cache_readahead(mapping, ra, file, pgoff);
  
         /*
          * Do we have something in the page cache already?
@@ -1403,13 +1359,8 @@ retry_find:
          * Ok, found a page in the page cache, now we need to check
          * that it's up-to-date.
          */
-       if (!PageUptodate(page)) {
-               if (nonblock) {
-                       page_cache_release(page);
-                       return NULL;
-               }
+       if (!PageUptodate(page))
                 goto page_not_uptodate;
-       }
  
  success:
         /*
@@ -1493,9 +1444,12 @@ err:
         return NULL;
  }
  
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
-               unsigned long len, pgprot_t prot, unsigned long pgoff,
-               int nonblock)
+static int filemap_populate(struct vm_area_struct *vma,
+                       unsigned long addr,
+                       unsigned long len,
+                       pgprot_t prot,
+                       unsigned long pgoff,
+                       int nonblock)
  {
         struct file *file = vma->vm_file;
         struct address_space *mapping = file->f_mapping;
@@ -1555,7 +1509,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
         vma->vm_ops = &generic_file_vm_ops;
         return 0;
  }
-EXPORT_SYMBOL(filemap_populate);
  
  /*
   * This is for filesystems which do not implement ->writepage.
@@ -1966,20 +1919,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
  
         pagevec_init(&lru_pvec, 0);
  
-       /*
-        * handle partial DIO write.  Adjust cur_iov if needed.
-        */
-       if (likely(nr_segs == 1))
-               buf = iov->iov_base + written;
-       else {
-               filemap_set_next_iovec(&cur_iov, &iov_base, written);
-               buf = cur_iov->iov_base + iov_base;
-       }
-
+       buf = iov->iov_base + written;  /* handle partial DIO write */
         do {
                 unsigned long index;
                 unsigned long offset;
-               unsigned long maxlen;
                 size_t copied;
  
                 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
@@ -1994,10 +1937,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                  * same page as we're writing to, without it being marked
                  * up-to-date.
                  */
-               maxlen = cur_iov->iov_len - iov_base;
-               if (maxlen > bytes)
-                       maxlen = bytes;
-               fault_in_pages_readable(buf, maxlen);
+               fault_in_pages_readable(buf, bytes);
  
                 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
                 if (!page) {
@@ -2035,13 +1975,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                                 count -= status;
                                 pos += status;
                                 buf += status;
-                               if (unlikely(nr_segs > 1)) {
+                               if (unlikely(nr_segs > 1))
                                         filemap_set_next_iovec(&cur_iov,
                                                         &iov_base, status);
-                                       buf = cur_iov->iov_base + iov_base;
-                               } else {
-                                       iov_base += status;
-                               }
                         }
                 }
                 if (unlikely(copied != bytes))
@@ -2121,8 +2057,6 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
         count = ocount;
         pos = *ppos;
  
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
         /* We can write back this queue in page reclaim */
         current->backing_dev_info = mapping->backing_dev_info;
         written = 0;
@@ -2226,7 +2160,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
         BUG_ON(iocb->ki_pos != pos);
  
         down(&inode->i_sem);
-       ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
+       ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,
                                                 &iocb->ki_pos);
         up(&inode->i_sem);
  
@@ -2302,8 +2236,7 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
  EXPORT_SYMBOL(generic_file_writev);
  
  /*
- * Called under i_sem for writes to S_ISREG files.   Returns -EIO if something
- * went wrong during pagecache shootdown.
+ * Called under i_sem for writes to S_ISREG files
   */
  ssize_t
  generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
@@ -2312,31 +2245,13 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         struct file *file = iocb->ki_filp;
         struct address_space *mapping = file->f_mapping;
         ssize_t retval;
-       size_t write_len = 0;
-
-       /*
-        * If it's a write, unmap all mmappings of the file up-front.  This
-        * will cause any pte dirty bits to be propagated into the pageframes
-        * for the subsequent filemap_write_and_wait().
-        */
-       if (rw == WRITE) {
-               write_len = iov_length(iov, nr_segs);
-               if (mapping_mapped(mapping))
-                       unmap_mapping_range(mapping, offset, write_len, 0);
-       }
  
         retval = filemap_write_and_wait(mapping);
         if (retval == 0) {
                 retval = mapping->a_ops->direct_IO(rw, iocb, iov,
                                                 offset, nr_segs);
-               if (rw == WRITE && mapping->nrpages) {
-                       pgoff_t end = (offset + write_len - 1)
-                                               >> PAGE_CACHE_SHIFT;
-                       int err = invalidate_inode_pages2_range(mapping,
-                                       offset >> PAGE_CACHE_SHIFT, end);
-                       if (err)
-                               retval = err;
-               }
+               if (rw == WRITE && mapping->nrpages)
+                       invalidate_inode_pages2(mapping);
         }
         return retval;
  }