patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / drivers / block / rd.c
index 3dd9163..fe8332e 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
-#include <linux/bio.h>
 #include <linux/buffer_head.h>         /* for invalidate_bdev() */
 #include <linux/backing-dev.h>
 #include <linux/blkpg.h>
+#include <linux/writeback.h>
+
 #include <asm/uaccess.h>
 
 /* The RAM disk size is now a parameter */
@@ -94,16 +95,46 @@ int rd_blocksize = BLOCK_SIZE;                      /* blocksize of the RAM disks */
  *               2000 Transmeta Corp.
  * aops copied from ramfs.
  */
-static int ramdisk_readpage(struct file *file, struct page *page)
-{
-       if (!PageUptodate(page)) {
-               void *kaddr = kmap_atomic(page, KM_USER0);
 
-               memset(kaddr, 0, PAGE_CACHE_SIZE);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
-               SetPageUptodate(page);
+/*
+ * If a ramdisk page has buffers, some may be uptodate and some may be not.
+ * To bring the page uptodate we zero out the non-uptodate buffers.  The
+ * page must be locked.
+ */
+static void make_page_uptodate(struct page *page)
+{
+       if (page_has_buffers(page)) {
+               struct buffer_head *bh = page_buffers(page);
+               struct buffer_head *head = bh;
+
+               do {
+                       if (!buffer_uptodate(bh)) {
+                               memset(bh->b_data, 0, bh->b_size);
+                               /*
+                                * akpm: I'm totally undecided about this.  The
+                                * buffer has just been magically brought "up to
+                                * date", but nobody should want to be reading
+                                * it anyway, because it hasn't been used for
+                                * anything yet.  It is still in a "not read
+                                * from disk yet" state.
+                                *
+                                * But non-uptodate buffers against an uptodate
+                                * page are against the rules.  So do it anyway.
+                                */
+                                set_buffer_uptodate(bh);
+                       }
+               } while ((bh = bh->b_this_page) != head);
+       } else {
+               memset(page_address(page), 0, PAGE_CACHE_SIZE);
        }
+       flush_dcache_page(page);
+       SetPageUptodate(page);
+}
+
+static int ramdisk_readpage(struct file *file, struct page *page)
+{
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
        unlock_page(page);
        return 0;
 }
@@ -111,34 +142,69 @@ static int ramdisk_readpage(struct file *file, struct page *page)
 static int ramdisk_prepare_write(struct file *file, struct page *page,
                                unsigned offset, unsigned to)
 {
-       if (!PageUptodate(page)) {
-               void *kaddr = kmap_atomic(page, KM_USER0);
-
-               memset(kaddr, 0, PAGE_CACHE_SIZE);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
-               SetPageUptodate(page);
-       }
-       SetPageDirty(page);
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
        return 0;
 }
 
 static int ramdisk_commit_write(struct file *file, struct page *page,
                                unsigned offset, unsigned to)
 {
+       set_page_dirty(page);
+       return 0;
+}
+
+/*
+ * ->writepage to the the blockdev's mapping has to redirty the page so that the
+ * VM doesn't go and steal it.  We return WRITEPAGE_ACTIVATE so that the VM
+ * won't try to (pointlessly) write the page again for a while.
+ *
+ * Really, these pages should not be on the LRU at all.
+ */
+static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
+{
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
+       SetPageDirty(page);
+       if (wbc->for_reclaim)
+               return WRITEPAGE_ACTIVATE;
+       unlock_page(page);
+       return 0;
+}
+
+/*
+ * This is a little speedup thing: short-circuit attempts to write back the
+ * ramdisk blockdev inode to its non-existent backing store.
+ */
+static int ramdisk_writepages(struct address_space *mapping,
+                               struct writeback_control *wbc)
+{
+       return 0;
+}
+
+/*
+ * ramdisk blockdev pages have their own ->set_page_dirty() because we don't
+ * want them to contribute to dirty memory accounting.
+ */
+static int ramdisk_set_page_dirty(struct page *page)
+{
+       SetPageDirty(page);
        return 0;
 }
 
 static struct address_space_operations ramdisk_aops = {
-       .readpage = ramdisk_readpage,
-       .prepare_write = ramdisk_prepare_write,
-       .commit_write = ramdisk_commit_write,
+       .readpage       = ramdisk_readpage,
+       .prepare_write  = ramdisk_prepare_write,
+       .commit_write   = ramdisk_commit_write,
+       .writepage      = ramdisk_writepage,
+       .set_page_dirty = ramdisk_set_page_dirty,
+       .writepages     = ramdisk_writepages,
 };
 
 static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
                                struct address_space *mapping)
 {
-       unsigned long index = sector >> (PAGE_CACHE_SHIFT - 9);
+       pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9);
        unsigned int vec_offset = vec->bv_offset;
        int offset = (sector << 9) & ~PAGE_CACHE_MASK;
        int size = vec->bv_len;
@@ -146,60 +212,47 @@ static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
 
        do {
                int count;
-               struct page * page;
-               char * src, * dst;
-               int unlock = 0;
+               struct page *page;
+               char *src;
+               char *dst;
 
                count = PAGE_CACHE_SIZE - offset;
                if (count > size)
                        count = size;
                size -= count;
 
-               page = find_get_page(mapping, index);
+               page = grab_cache_page(mapping, index);
                if (!page) {
-                       page = grab_cache_page(mapping, index);
                        err = -ENOMEM;
-                       if (!page)
-                               goto out;
-                       err = 0;
-
-                       if (!PageUptodate(page)) {
-                               void *kaddr = kmap_atomic(page, KM_USER0);
-
-                               memset(kaddr, 0, PAGE_CACHE_SIZE);
-                               flush_dcache_page(page);
-                               kunmap_atomic(kaddr, KM_USER0);
-                               SetPageUptodate(page);
-                       }
-
-                       unlock = 1;
+                       goto out;
                }
 
+               if (!PageUptodate(page))
+                       make_page_uptodate(page);
+
                index++;
 
                if (rw == READ) {
-                       src = kmap(page) + offset;
-                       dst = kmap(vec->bv_page) + vec_offset;
+                       src = kmap_atomic(page, KM_USER0) + offset;
+                       dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset;
                } else {
-                       dst = kmap(page) + offset;
-                       src = kmap(vec->bv_page) + vec_offset;
+                       src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset;
+                       dst = kmap_atomic(page, KM_USER1) + offset;
                }
                offset = 0;
                vec_offset += count;
 
                memcpy(dst, src, count);
 
-               kunmap(page);
-               kunmap(vec->bv_page);
+               kunmap_atomic(src, KM_USER0);
+               kunmap_atomic(dst, KM_USER1);
 
-               if (rw == READ) {
+               if (rw == READ)
                        flush_dcache_page(vec->bv_page);
-               } else {
-                       SetPageDirty(page);
-               }
-               if (unlock)
-                       unlock_page(page);
-               __free_page(page);
+               else
+                       set_page_dirty(page);
+               unlock_page(page);
+               put_page(page);
        } while (size);
 
  out:
@@ -251,7 +304,7 @@ static int rd_ioctl(struct inode *inode, struct file *file,
        struct block_device *bdev = inode->i_bdev;
 
        if (cmd != BLKFLSBUF)
-               return -EINVAL;
+               return -ENOTTY;
 
        /*
         * special: we want to release the ramdisk memory, it's not like with
@@ -268,28 +321,65 @@ static int rd_ioctl(struct inode *inode, struct file *file,
        return error;
 }
 
+/*
+ * This is the backing_dev_info for the blockdev inode itself.  It doesn't need
+ * writeback and it does not contribute to dirty memory accounting.
+ */
 static struct backing_dev_info rd_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
        .memory_backed  = 1,    /* Does not contribute to dirty memory */
-       .unplug_io_fn = default_unplug_io_fn,
+       .unplug_io_fn   = default_unplug_io_fn,
+};
+
+/*
+ * This is the backing_dev_info for the files which live atop the ramdisk
+ * "device".  These files do need writeback and they do contribute to dirty
+ * memory accounting.
+ */
+static struct backing_dev_info rd_file_backing_dev_info = {
+       .ra_pages       = 0,    /* No readahead */
+       .memory_backed  = 0,    /* Does contribute to dirty memory */
+       .unplug_io_fn   = default_unplug_io_fn,
 };
 
 static int rd_open(struct inode *inode, struct file *filp)
 {
        unsigned unit = iminor(inode);
 
-       /*
-        * Immunize device against invalidate_buffers() and prune_icache().
-        */
        if (rd_bdev[unit] == NULL) {
                struct block_device *bdev = inode->i_bdev;
+               struct address_space *mapping;
+               int gfp_mask;
+
                inode = igrab(bdev->bd_inode);
                rd_bdev[unit] = bdev;
                bdev->bd_openers++;
                bdev->bd_block_size = rd_blocksize;
                inode->i_size = get_capacity(rd_disks[unit])<<9;
-               inode->i_mapping->a_ops = &ramdisk_aops;
-               inode->i_mapping->backing_dev_info = &rd_backing_dev_info;
+               mapping = inode->i_mapping;
+               mapping->a_ops = &ramdisk_aops;
+               mapping->backing_dev_info = &rd_backing_dev_info;
+               bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info;
+
+               /*
+                * Deep badness.  rd_blkdev_pagecache_IO() needs to allocate
+                * pagecache pages within a request_fn.  We cannot recur back
+                * into the filesytem which is mounted atop the ramdisk, because
+                * that would deadlock on fs locks.  And we really don't want
+                * to reenter rd_blkdev_pagecache_IO when we're already within
+                * that function.
+                *
+                * So we turn off __GFP_FS and __GFP_IO.
+                *
+                * And to give this thing a hope of working, turn on __GFP_HIGH.
+                * Hopefully, there's enough regular memory allocation going on
+                * for the page allocator emergency pools to keep the ramdisk
+                * driver happy.
+                */
+               gfp_mask = mapping_gfp_mask(mapping);
+               gfp_mask &= ~(__GFP_FS|__GFP_IO);
+               gfp_mask |= __GFP_HIGH;
+               mapping_set_gfp_mask(mapping, gfp_mask);
        }
 
        return 0;