VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / mm / filemap.c
index 6e59faa..ab85dcb 100644 (file)
@@ -60,6 +60,7 @@
  *      ->swap_list_lock
  *        ->swap_device_lock   (exclusive_swap_page, others)
  *          ->mapping->tree_lock
+ *    ->page_map_lock()                (try_to_unmap_file)
  *
  *  ->i_sem
  *    ->i_mmap_lock            (truncate->unmap_mapping_range)
  *    ->private_lock           (try_to_unmap_one)
  *    ->tree_lock              (try_to_unmap_one)
  *    ->zone.lru_lock          (follow_page->mark_page_accessed)
+ *    ->page_map_lock()                (page_add_anon_rmap)
+ *      ->tree_lock            (page_remove_rmap->set_page_dirty)
+ *      ->private_lock         (page_remove_rmap->set_page_dirty)
+ *      ->inode_lock           (page_remove_rmap->set_page_dirty)
+ *    ->anon_vma.lock          (anon_vma_prepare)
+ *    ->inode_lock             (zap_pte_range->set_page_dirty)
+ *    ->private_lock           (zap_pte_range->__set_page_dirty_buffers)
  *
  *  ->task->proc_lock
  *    ->dcache_lock            (proc_pid_lookup)
@@ -192,7 +200,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping,
        index = start;
        while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
                        PAGECACHE_TAG_WRITEBACK,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
                unsigned i;
 
                for (i = 0; i < nr_pages; i++) {
@@ -432,10 +440,6 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
 {
        struct page *page;
 
-       /*
-        * We scan the hash list read-only. Addition to and removal from
-        * the hash-list needs a held write-lock.
-        */
        spin_lock_irq(&mapping->tree_lock);
        page = radix_tree_lookup(&mapping->page_tree, offset);
        if (page)
@@ -650,7 +654,8 @@ void do_generic_mapping_read(struct address_space *mapping,
                             read_actor_t actor)
 {
        struct inode *inode = mapping->host;
-       unsigned long index, offset;
+       unsigned long index, end_index, offset;
+       loff_t isize;
        struct page *cached_page;
        int error;
        struct file_ra_state ra = *_ra;
@@ -659,26 +664,18 @@ void do_generic_mapping_read(struct address_space *mapping,
        index = *ppos >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
 
+       isize = i_size_read(inode);
+       end_index = isize >> PAGE_CACHE_SHIFT;
+       if (index > end_index)
+               goto out;
+
        for (;;) {
                struct page *page;
-               unsigned long end_index, nr, ret;
-               loff_t isize = i_size_read(inode);
-
-               end_index = isize >> PAGE_CACHE_SHIFT;
-                       
-               if (index > end_index)
-                       break;
-               nr = PAGE_CACHE_SIZE;
-               if (index == end_index) {
-                       nr = isize & ~PAGE_CACHE_MASK;
-                       if (nr <= offset)
-                               break;
-               }
+               unsigned long nr, ret;
 
                cond_resched();
                page_cache_readahead(mapping, &ra, filp, index);
 
-               nr = nr - offset;
 find_page:
                page = find_get_page(mapping, index);
                if (unlikely(page == NULL)) {
@@ -688,6 +685,17 @@ find_page:
                if (!PageUptodate(page))
                        goto page_not_up_to_date;
 page_ok:
+               /* nr is the maximum number of bytes to copy from this page */
+               nr = PAGE_CACHE_SIZE;
+               if (index == end_index) {
+                       nr = isize & ~PAGE_CACHE_MASK;
+                       if (nr <= offset) {
+                               page_cache_release(page);
+                               goto out;
+                       }
+               }
+               nr = nr - offset;
+
                /* If users can be writing to this page using arbitrary
                 * virtual addresses, take care about potential aliasing
                 * before reading the page on the kernel side.
@@ -719,7 +727,7 @@ page_ok:
                page_cache_release(page);
                if (ret == nr && desc->count)
                        continue;
-               break;
+               goto out;
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
@@ -739,22 +747,41 @@ page_not_up_to_date:
                }
 
 readpage:
-               /* ... and start the actual read. The read will unlock the page. */
+               /* Start the actual read. The read will unlock the page. */
                error = mapping->a_ops->readpage(filp, page);
 
-               if (!error) {
-                       if (PageUptodate(page))
-                               goto page_ok;
+               if (unlikely(error))
+                       goto readpage_error;
+
+               if (!PageUptodate(page)) {
                        wait_on_page_locked(page);
-                       if (PageUptodate(page))
-                               goto page_ok;
-                       error = -EIO;
+                       if (!PageUptodate(page)) {
+                               error = -EIO;
+                               goto readpage_error;
+                       }
                }
 
+               /*
+                * i_size must be checked after we have done ->readpage.
+                *
+                * Checking i_size after the readpage allows us to calculate
+                * the correct value for "nr", which means the zero-filled
+                * part of the page is not copied back to userspace (unless
+                * another truncate extends the file - this is desired though).
+                */
+               isize = i_size_read(inode);
+               end_index = isize >> PAGE_CACHE_SHIFT;
+               if (index > end_index) {
+                       page_cache_release(page);
+                       goto out;
+               }
+               goto page_ok;
+
+readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
                desc->error = error;
                page_cache_release(page);
-               break;
+               goto out;
 
 no_cached_page:
                /*
@@ -765,7 +792,7 @@ no_cached_page:
                        cached_page = page_cache_alloc_cold(mapping);
                        if (!cached_page) {
                                desc->error = -ENOMEM;
-                               break;
+                               goto out;
                        }
                }
                error = add_to_page_cache_lru(cached_page, mapping,
@@ -774,13 +801,14 @@ no_cached_page:
                        if (error == -EEXIST)
                                goto find_page;
                        desc->error = error;
-                       break;
+                       goto out;
                }
                page = cached_page;
                cached_page = NULL;
                goto readpage;
        }
 
+out:
        *_ra = ra;
 
        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
@@ -804,9 +832,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
         * Faults on the destination of a read are common, so do it before
         * taking the kmap.
         */
-       if (!fault_in_pages_writeable(desc->buf, size)) {
+       if (!fault_in_pages_writeable(desc->arg.buf, size)) {
                kaddr = kmap_atomic(page, KM_USER0);
-               left = __copy_to_user(desc->buf, kaddr + offset, size);
+               left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
                kunmap_atomic(kaddr, KM_USER0);
                if (left == 0)
                        goto success;
@@ -814,7 +842,7 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
 
        /* Do it the slow way */
        kaddr = kmap(page);
-       left = __copy_to_user(desc->buf, kaddr + offset, size);
+       left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
        kunmap(page);
 
        if (left) {
@@ -824,7 +852,7 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
 success:
        desc->count = count - size;
        desc->written += size;
-       desc->buf += size;
+       desc->arg.buf += size;
        return size;
 }
 
@@ -891,7 +919,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                        read_descriptor_t desc;
 
                        desc.written = 0;
-                       desc.buf = iov[seg].iov_base;
+                       desc.arg.buf = iov[seg].iov_base;
                        desc.count = iov[seg].iov_len;
                        if (desc.count == 0)
                                continue;
@@ -941,7 +969,7 @@ int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long o
 {
        ssize_t written;
        unsigned long count = desc->count;
-       struct file *file = (struct file *) desc->buf;
+       struct file *file = desc->arg.data;
 
        if (size > count)
                size = count;
@@ -958,7 +986,7 @@ int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long o
 }
 
 ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void __user *target)
+                        size_t count, read_actor_t actor, void *target)
 {
        read_descriptor_t desc;
 
@@ -967,7 +995,7 @@ ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
 
        desc.written = 0;
        desc.count = count;
-       desc.buf = target;
+       desc.arg.data = target;
        desc.error = 0;
 
        do_generic_file_read(in_file, ppos, &desc, actor);
@@ -1123,12 +1151,11 @@ retry_find:
                did_readaround = 1;
                ra_pages = max_sane_readahead(file->f_ra.ra_pages);
                if (ra_pages) {
-                       long start;
+                       pgoff_t start = 0;
 
-                       start = pgoff - ra_pages / 2;
-                       if (pgoff < 0)
-                               pgoff = 0;
-                       do_page_cache_readahead(mapping, file, pgoff, ra_pages);
+                       if (pgoff > ra_pages / 2)
+                               start = pgoff - ra_pages / 2;
+                       do_page_cache_readahead(mapping, file, start, ra_pages);
                }
                page = find_get_page(mapping, pgoff);
                if (!page)
@@ -1390,15 +1417,9 @@ repeat:
                        return err;
                }
        } else {
-               /*
-                * If a nonlinear mapping then store the file page offset
-                * in the pte.
-                */
-               if (pgoff != linear_page_index(vma, addr)) {
-                       err = install_file_pte(mm, vma, addr, pgoff, prot);
-                       if (err)
-                               return err;
-               }
+               err = install_file_pte(mm, vma, addr, pgoff, prot);
+               if (err)
+                       return err;
        }
 
        len -= PAGE_SIZE;
@@ -1891,7 +1912,7 @@ generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
                count -= written;
        }
 
-       buf = iov->iov_base;
+       buf = iov->iov_base + written;  /* handle partial DIO write */
        do {
                unsigned long index;
                unsigned long offset;
@@ -1989,7 +2010,7 @@ out_status:
        err = written ? written : status;
 out:
        pagevec_lru_add(&lru_pvec);
-       current->backing_dev_info = 0;
+       current->backing_dev_info = NULL;
        return err;
 }