Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff

[linux-2.6.git] / drivers / block / rd.c
diff --git a/drivers/block/rd.c b/drivers/block/rd.c

index 3dd9163..940bfd7 100644 (file)
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -48,26 +48,25 @@
  #include <asm/atomic.h>
  #include <linux/bio.h>
  #include <linux/module.h>
+#include <linux/moduleparam.h>
  #include <linux/init.h>
  #include <linux/devfs_fs_kernel.h>
  #include <linux/pagemap.h>
  #include <linux/blkdev.h>
  #include <linux/genhd.h>
-#include <linux/bio.h>
  #include <linux/buffer_head.h>         /* for invalidate_bdev() */
  #include <linux/backing-dev.h>
  #include <linux/blkpg.h>
-#include <asm/uaccess.h>
+#include <linux/writeback.h>
  
-/* The RAM disk size is now a parameter */
-#define NUM_RAMDISKS 16                /* This cannot be overridden (yet) */
+#include <asm/uaccess.h>
  
  /* Various static variables go here.  Most are used only in the RAM disk code.
   */
  
-static struct gendisk *rd_disks[NUM_RAMDISKS];
-static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */
-static struct request_queue *rd_queue[NUM_RAMDISKS];
+static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT];
+static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */
+static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT];
  
  /*
   * Parameters for the boot-loading of the RAM disk.  These are set by
@@ -87,23 +86,53 @@ int rd_size = CONFIG_BLK_DEV_RAM_SIZE;              /* Size of the RAM disks */
   * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that
   * supposes the filesystem in the image uses a BLOCK_SIZE blocksize).
   */
-int rd_blocksize = BLOCK_SIZE;                 /* blocksize of the RAM disks */
+static int rd_blocksize = BLOCK_SIZE;          /* blocksize of the RAM disks */
  
  /*
   * Copyright (C) 2000 Linus Torvalds.
   *               2000 Transmeta Corp.
   * aops copied from ramfs.
   */
-static int ramdisk_readpage(struct file *file, struct page *page)
-{
-       if (!PageUptodate(page)) {
-               void *kaddr = kmap_atomic(page, KM_USER0);
  
-               memset(kaddr, 0, PAGE_CACHE_SIZE);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
-               SetPageUptodate(page);
+/*
+ * If a ramdisk page has buffers, some may be uptodate and some may be not.
+ * To bring the page uptodate we zero out the non-uptodate buffers.  The
+ * page must be locked.
+ */
+static void make_page_uptodate(struct page *page)
+{
+       if (page_has_buffers(page)) {
+               struct buffer_head *bh = page_buffers(page);
+               struct buffer_head *head = bh;
+
+               do {
+                       if (!buffer_uptodate(bh)) {
+                               memset(bh->b_data, 0, bh->b_size);
+                               /*
+                                * akpm: I'm totally undecided about this.  The
+                                * buffer has just been magically brought "up to
+                                * date", but nobody should want to be reading
+                                * it anyway, because it hasn't been used for
+                                * anything yet.  It is still in a "not read
+                                * from disk yet" state.
+                                *
+                                * But non-uptodate buffers against an uptodate
+                                * page are against the rules.  So do it anyway.
+                                */
+                                set_buffer_uptodate(bh);
+                       }
+               } while ((bh = bh->b_this_page) != head);
+       } else {
+               memset(page_address(page), 0, PAGE_CACHE_SIZE);
         }
+       flush_dcache_page(page);
+       SetPageUptodate(page);
+}
+
+static int ramdisk_readpage(struct file *file, struct page *page)
+{
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
         unlock_page(page);
         return 0;
  }
@@ -111,34 +140,70 @@ static int ramdisk_readpage(struct file *file, struct page *page)
  static int ramdisk_prepare_write(struct file *file, struct page *page,
                                 unsigned offset, unsigned to)
  {
-       if (!PageUptodate(page)) {
-               void *kaddr = kmap_atomic(page, KM_USER0);
-
-               memset(kaddr, 0, PAGE_CACHE_SIZE);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
-               SetPageUptodate(page);
-       }
-       SetPageDirty(page);
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
         return 0;
  }
  
  static int ramdisk_commit_write(struct file *file, struct page *page,
                                 unsigned offset, unsigned to)
  {
+       set_page_dirty(page);
+       return 0;
+}
+
+/*
+ * ->writepage to the the blockdev's mapping has to redirty the page so that the
+ * VM doesn't go and steal it.  We return AOP_WRITEPAGE_ACTIVATE so that the VM
+ * won't try to (pointlessly) write the page again for a while.
+ *
+ * Really, these pages should not be on the LRU at all.
+ */
+static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
+{
+       if (!PageUptodate(page))
+               make_page_uptodate(page);
+       SetPageDirty(page);
+       if (wbc->for_reclaim)
+               return AOP_WRITEPAGE_ACTIVATE;
+       unlock_page(page);
+       return 0;
+}
+
+/*
+ * This is a little speedup thing: short-circuit attempts to write back the
+ * ramdisk blockdev inode to its non-existent backing store.
+ */
+static int ramdisk_writepages(struct address_space *mapping,
+                               struct writeback_control *wbc)
+{
+       return 0;
+}
+
+/*
+ * ramdisk blockdev pages have their own ->set_page_dirty() because we don't
+ * want them to contribute to dirty memory accounting.
+ */
+static int ramdisk_set_page_dirty(struct page *page)
+{
+       if (!TestSetPageDirty(page))
+               return 1;
         return 0;
  }
  
  static struct address_space_operations ramdisk_aops = {
-       .readpage = ramdisk_readpage,
-       .prepare_write = ramdisk_prepare_write,
-       .commit_write = ramdisk_commit_write,
+       .readpage       = ramdisk_readpage,
+       .prepare_write  = ramdisk_prepare_write,
+       .commit_write   = ramdisk_commit_write,
+       .writepage      = ramdisk_writepage,
+       .set_page_dirty = ramdisk_set_page_dirty,
+       .writepages     = ramdisk_writepages,
  };
  
  static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
                                 struct address_space *mapping)
  {
-       unsigned long index = sector >> (PAGE_CACHE_SHIFT - 9);
+       pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9);
         unsigned int vec_offset = vec->bv_offset;
         int offset = (sector << 9) & ~PAGE_CACHE_MASK;
         int size = vec->bv_len;
@@ -146,60 +211,47 @@ static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
  
         do {
                 int count;
-               struct page * page;
-               char * src, * dst;
-               int unlock = 0;
+               struct page *page;
+               char *src;
+               char *dst;
  
                 count = PAGE_CACHE_SIZE - offset;
                 if (count > size)
                         count = size;
                 size -= count;
  
-               page = find_get_page(mapping, index);
+               page = grab_cache_page(mapping, index);
                 if (!page) {
-                       page = grab_cache_page(mapping, index);
                         err = -ENOMEM;
-                       if (!page)
-                               goto out;
-                       err = 0;
-
-                       if (!PageUptodate(page)) {
-                               void *kaddr = kmap_atomic(page, KM_USER0);
-
-                               memset(kaddr, 0, PAGE_CACHE_SIZE);
-                               flush_dcache_page(page);
-                               kunmap_atomic(kaddr, KM_USER0);
-                               SetPageUptodate(page);
-                       }
-
-                       unlock = 1;
+                       goto out;
                 }
  
+               if (!PageUptodate(page))
+                       make_page_uptodate(page);
+
                 index++;
  
                 if (rw == READ) {
-                       src = kmap(page) + offset;
-                       dst = kmap(vec->bv_page) + vec_offset;
+                       src = kmap_atomic(page, KM_USER0) + offset;
+                       dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset;
                 } else {
-                       dst = kmap(page) + offset;
-                       src = kmap(vec->bv_page) + vec_offset;
+                       src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset;
+                       dst = kmap_atomic(page, KM_USER1) + offset;
                 }
                 offset = 0;
                 vec_offset += count;
  
                 memcpy(dst, src, count);
  
-               kunmap(page);
-               kunmap(vec->bv_page);
+               kunmap_atomic(src, KM_USER0);
+               kunmap_atomic(dst, KM_USER1);
  
-               if (rw == READ) {
+               if (rw == READ)
                         flush_dcache_page(vec->bv_page);
-               } else {
-                       SetPageDirty(page);
-               }
-               if (unlock)
-                       unlock_page(page);
-               __free_page(page);
+               else
+                       set_page_dirty(page);
+               unlock_page(page);
+               put_page(page);
         } while (size);
  
   out:
@@ -251,7 +303,7 @@ static int rd_ioctl(struct inode *inode, struct file *file,
         struct block_device *bdev = inode->i_bdev;
  
         if (cmd != BLKFLSBUF)
-               return -EINVAL;
+               return -ENOTTY;
  
         /*
          * special: we want to release the ramdisk memory, it's not like with
@@ -259,37 +311,78 @@ static int rd_ioctl(struct inode *inode, struct file *file,
          * cache
          */
         error = -EBUSY;
-       down(&bdev->bd_sem);
+       mutex_lock(&bdev->bd_mutex);
         if (bdev->bd_openers <= 2) {
                 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
                 error = 0;
         }
-       up(&bdev->bd_sem);
+       mutex_unlock(&bdev->bd_mutex);
         return error;
  }
  
+/*
+ * This is the backing_dev_info for the blockdev inode itself.  It doesn't need
+ * writeback and it does not contribute to dirty memory accounting.
+ */
  static struct backing_dev_info rd_backing_dev_info = {
         .ra_pages       = 0,    /* No readahead */
-       .memory_backed  = 1,    /* Does not contribute to dirty memory */
-       .unplug_io_fn = default_unplug_io_fn,
+       .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY,
+       .unplug_io_fn   = default_unplug_io_fn,
+};
+
+/*
+ * This is the backing_dev_info for the files which live atop the ramdisk
+ * "device".  These files do need writeback and they do contribute to dirty
+ * memory accounting.
+ */
+static struct backing_dev_info rd_file_backing_dev_info = {
+       .ra_pages       = 0,    /* No readahead */
+       .capabilities   = BDI_CAP_MAP_COPY,     /* Does contribute to dirty memory */
+       .unplug_io_fn   = default_unplug_io_fn,
  };
  
  static int rd_open(struct inode *inode, struct file *filp)
  {
         unsigned unit = iminor(inode);
  
-       /*
-        * Immunize device against invalidate_buffers() and prune_icache().
-        */
         if (rd_bdev[unit] == NULL) {
                 struct block_device *bdev = inode->i_bdev;
+               struct address_space *mapping;
+               unsigned bsize;
+               gfp_t gfp_mask;
+
                 inode = igrab(bdev->bd_inode);
                 rd_bdev[unit] = bdev;
                 bdev->bd_openers++;
-               bdev->bd_block_size = rd_blocksize;
-               inode->i_size = get_capacity(rd_disks[unit])<<9;
-               inode->i_mapping->a_ops = &ramdisk_aops;
-               inode->i_mapping->backing_dev_info = &rd_backing_dev_info;
+               bsize = bdev_hardsect_size(bdev);
+               bdev->bd_block_size = bsize;
+               inode->i_blkbits = blksize_bits(bsize);
+               inode->i_size = get_capacity(bdev->bd_disk)<<9;
+
+               mapping = inode->i_mapping;
+               mapping->a_ops = &ramdisk_aops;
+               mapping->backing_dev_info = &rd_backing_dev_info;
+               bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info;
+
+               /*
+                * Deep badness.  rd_blkdev_pagecache_IO() needs to allocate
+                * pagecache pages within a request_fn.  We cannot recur back
+                * into the filesytem which is mounted atop the ramdisk, because
+                * that would deadlock on fs locks.  And we really don't want
+                * to reenter rd_blkdev_pagecache_IO when we're already within
+                * that function.
+                *
+                * So we turn off __GFP_FS and __GFP_IO.
+                *
+                * And to give this thing a hope of working, turn on __GFP_HIGH.
+                * Hopefully, there's enough regular memory allocation going on
+                * for the page allocator emergency pools to keep the ramdisk
+                * driver happy.
+                */
+               gfp_mask = mapping_gfp_mask(mapping);
+               gfp_mask &= ~(__GFP_FS|__GFP_IO);
+               gfp_mask |= __GFP_HIGH;
+               mapping_set_gfp_mask(mapping, gfp_mask);
         }
  
         return 0;
@@ -308,7 +401,7 @@ static void __exit rd_cleanup(void)
  {
         int i;
  
-       for (i = 0; i < NUM_RAMDISKS; i++) {
+       for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
                 struct block_device *bdev = rd_bdev[i];
                 rd_bdev[i] = NULL;
                 if (bdev) {
@@ -338,7 +431,7 @@ static int __init rd_init(void)
                 rd_blocksize = BLOCK_SIZE;
         }
  
-       for (i = 0; i < NUM_RAMDISKS; i++) {
+       for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
                 rd_disks[i] = alloc_disk(1);
                 if (!rd_disks[i])
                         goto out;
@@ -351,7 +444,7 @@ static int __init rd_init(void)
  
         devfs_mk_dir("rd");
  
-       for (i = 0; i < NUM_RAMDISKS; i++) {
+       for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
                 struct gendisk *disk = rd_disks[i];
  
                 rd_queue[i] = blk_alloc_queue(GFP_KERNEL);
@@ -359,6 +452,7 @@ static int __init rd_init(void)
                         goto out_queue;
  
                 blk_queue_make_request(rd_queue[i], &rd_make_request);
+               blk_queue_hardsect_size(rd_queue[i], rd_blocksize);
  
                 /* rd_size is given in kB */
                 disk->major = RAMDISK_MAJOR;
@@ -375,7 +469,7 @@ static int __init rd_init(void)
         /* rd_size is given in kB */
         printk("RAMDISK driver initialized: "
                 "%d RAM disks of %dK size %d blocksize\n",
-               NUM_RAMDISKS, rd_size, rd_blocksize);
+               CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize);
  
         return 0;
  out_queue:
@@ -413,9 +507,10 @@ __setup("ramdisk_blocksize=", ramdisk_blocksize);
  #endif
  
  /* options - modular */
-MODULE_PARM     (rd_size, "1i");
+module_param(rd_size, int, 0);
  MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
-MODULE_PARM     (rd_blocksize, "i");
+module_param(rd_blocksize, int, 0);
  MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes.");
+MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
  
  MODULE_LICENSE("GPL");