Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / fs / nfs / read.c
index 053d546..624ca71 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/mempool.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
 static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ  (32)
 
-static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 {
-       struct nfs_read_data   *p;
-       p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+       struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
        if (p) {
                memset(p, 0, sizeof(*p));
                INIT_LIST_HEAD(&p->pages);
+               if (pagecount < NFS_PAGEVEC_SIZE)
+                       p->pagevec = &p->page_array[0];
+               else {
+                       size_t size = ++pagecount * sizeof(struct page *);
+                       p->pagevec = kmalloc(size, GFP_NOFS);
+                       if (p->pagevec) {
+                               memset(p->pagevec, 0, size);
+                       } else {
+                               mempool_free(p, nfs_rdata_mempool);
+                               p = NULL;
+                       }
+               }
        }
        return p;
 }
 
-static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readdata_free(struct nfs_read_data *p)
 {
+       if (p && (p->pagevec != &p->page_array[0]))
+               kfree(p->pagevec);
        mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_release(struct rpc_task *task)
+void nfs_readdata_release(void *data)
 {
-        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
         nfs_readdata_free(data);
 }
 
@@ -93,27 +107,29 @@ int nfs_return_empty_page(struct page *page)
 /*
  * Read a page synchronously.
  */
-static int
-nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
+static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
+               struct page *page)
 {
        unsigned int    rsize = NFS_SERVER(inode)->rsize;
        unsigned int    count = PAGE_CACHE_SIZE;
        int             result;
-       struct nfs_read_data    rdata = {
-               .flags          = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0),
-               .cred           = NULL,
-               .inode          = inode,
-               .args           = {
-                       .fh             = NFS_FH(inode),
-                       .lockowner      = current->files,
-                       .pages          = &page,
-                       .pgbase         = 0UL,
-                       .count          = rsize,
-               },
-               .res            = {
-                       .fattr          = &rdata.fattr,
-               }
-       };
+       struct nfs_read_data *rdata;
+
+       rdata = nfs_readdata_alloc(1);
+       if (!rdata)
+               return -ENOMEM;
+
+       memset(rdata, 0, sizeof(*rdata));
+       rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+       rdata->cred = ctx->cred;
+       rdata->inode = inode;
+       INIT_LIST_HEAD(&rdata->pages);
+       rdata->args.fh = NFS_FH(inode);
+       rdata->args.context = ctx;
+       rdata->args.pages = &page;
+       rdata->args.pgbase = 0UL;
+       rdata->args.count = rsize;
+       rdata->res.fattr = &rdata->fattr;
 
        dprintk("NFS: nfs_readpage_sync(%p)\n", page);
 
@@ -123,19 +139,19 @@ nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
         */
        do {
                if (count < rsize)
-                       rdata.args.count = count;
-               rdata.res.count = rdata.args.count;
-               rdata.args.offset = page_offset(page) + rdata.args.pgbase;
+                       rdata->args.count = count;
+               rdata->res.count = rdata->args.count;
+               rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
                dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
                        NFS_SERVER(inode)->hostname,
                        inode->i_sb->s_id,
                        (long long)NFS_FILEID(inode),
-                       (unsigned long long)rdata.args.pgbase,
-                       rdata.args.count);
+                       (unsigned long long)rdata->args.pgbase,
+                       rdata->args.count);
 
                lock_kernel();
-               result = NFS_PROTO(inode)->read(&rdata, file);
+               result = NFS_PROTO(inode)->read(rdata);
                unlock_kernel();
 
                /*
@@ -148,17 +164,21 @@ nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
                        goto io_error;
                }
                count -= result;
-               rdata.args.pgbase += result;
+               rdata->args.pgbase += result;
+               nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
                /* Note: result == 0 should only happen if we're caching
                 * a write that extends the file and punches a hole.
                 */
-               if (rdata.res.eof != 0 || result == 0)
+               if (rdata->res.eof != 0 || result == 0)
                        break;
        } while (count);
-       NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+       spin_lock(&inode->i_lock);
+       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&inode->i_lock);
 
        if (count)
-               memclear_highpage_flush(page, rdata.args.pgbase, count);
+               memclear_highpage_flush(page, rdata->args.pgbase, count);
        SetPageUptodate(page);
        if (PageError(page))
                ClearPageError(page);
@@ -166,11 +186,12 @@ nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
 
 io_error:
        unlock_page(page);
+       nfs_readdata_free(rdata);
        return result;
 }
 
-static int
-nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
+static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+               struct page *page)
 {
        LIST_HEAD(one_request);
        struct nfs_page *new;
@@ -179,7 +200,7 @@ nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
        len = nfs_page_length(inode, page);
        if (len == 0)
                return nfs_return_empty_page(page);
-       new = nfs_create_request(file, inode, page, 0, len);
+       new = nfs_create_request(ctx, inode, page, 0, len);
        if (IS_ERR(new)) {
                unlock_page(page);
                return PTR_ERR(new);
@@ -187,7 +208,6 @@ nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
        if (len < PAGE_CACHE_SIZE)
                memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 
-       nfs_lock_request(new);
        nfs_list_add_request(new, &one_request);
        nfs_pagein_one(&one_request, inode);
        return 0;
@@ -197,47 +217,47 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
        unlock_page(req->wb_page);
 
-       nfs_clear_request(req);
-       nfs_release_request(req);
-       nfs_unlock_request(req);
-
        dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
-                       req->wb_inode->i_sb->s_id,
-                       (long long)NFS_FILEID(req->wb_inode),
+                       req->wb_context->dentry->d_inode->i_sb->s_id,
+                       (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
                        req->wb_bytes,
                        (long long)req_offset(req));
+       nfs_clear_request(req);
+       nfs_release_request(req);
 }
 
 /*
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset)
 {
        struct inode            *inode;
+       int flags;
 
        data->req         = req;
-       data->inode       = inode = req->wb_inode;
-       data->cred        = req->wb_cred;
+       data->inode       = inode = req->wb_context->dentry->d_inode;
+       data->cred        = req->wb_context->cred;
 
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
        data->args.pgbase = req->wb_pgbase + offset;
        data->args.pages  = data->pagevec;
        data->args.count  = count;
-       data->args.lockowner = req->wb_lockowner;
-       data->args.state  = req->wb_state;
+       data->args.context = req->wb_context;
 
        data->res.fattr   = &data->fattr;
        data->res.count   = count;
        data->res.eof     = 0;
+       nfs_fattr_init(&data->fattr);
 
+       /* Set up the initial task struct. */
+       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+       rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
        NFS_PROTO(inode)->read_setup(data);
 
        data->task.tk_cookie = (unsigned long)inode;
-       data->task.tk_calldata = data;
-       /* Release requests */
-       data->task.tk_release = nfs_readdata_release;
 
        dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
                        data->task.tk_pid,
@@ -302,9 +322,10 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 
        nbytes = req->wb_bytes;
        for(;;) {
-               data = nfs_readdata_alloc();
+               data = nfs_readdata_alloc(1);
                if (!data)
                        goto out_bad;
+               INIT_LIST_HEAD(&data->pages);
                list_add(&data->pages, &list);
                requests++;
                if (nbytes <= rsize)
@@ -321,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
                list_del_init(&data->pages);
 
                data->pagevec[0] = page;
-               data->complete = nfs_readpage_result_partial;
 
                if (nbytes > rsize) {
-                       nfs_read_rpcsetup(req, data, rsize, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       rsize, offset);
                        offset += rsize;
                        nbytes -= rsize;
                } else {
-                       nfs_read_rpcsetup(req, data, nbytes, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       nbytes, offset);
                        nbytes = 0;
                }
                nfs_execute_read(data);
@@ -357,10 +379,11 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
        if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
                return nfs_pagein_multi(head, inode);
 
-       data = nfs_readdata_alloc();
+       data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
        if (!data)
                goto out_bad;
 
+       INIT_LIST_HEAD(&data->pages);
        pages = data->pagevec;
        count = 0;
        while (!list_empty(head)) {
@@ -373,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
        }
        req = nfs_list_entry(data->pages.next);
 
-       data->complete = nfs_readpage_result_full;
-       nfs_read_rpcsetup(req, data, count, 0);
+       nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
        nfs_execute_read(data);
        return 0;
@@ -383,7 +405,7 @@ out_bad:
        return -ENOMEM;
 }
 
-int
+static int
 nfs_pagein_list(struct list_head *head, int rpages)
 {
        LIST_HEAD(one_request);
@@ -394,7 +416,7 @@ nfs_pagein_list(struct list_head *head, int rpages)
        while (!list_empty(head)) {
                pages += nfs_coalesce_requests(head, &one_request, rpages);
                req = nfs_list_entry(one_request.next);
-               error = nfs_pagein_one(&one_request, req->wb_inode);
+               error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
                if (error < 0)
                        break;
        }
@@ -408,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        struct nfs_page *req = data->req;
        struct page *page = req->wb_page;
  
-       if (status >= 0) {
+       if (nfs_readpage_result(task, data) != 0)
+               return;
+       if (task->tk_status >= 0) {
                unsigned int request = data->args.count;
                unsigned int result = data->res.count;
 
@@ -432,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+       .rpc_call_done = nfs_readpage_result_partial,
+       .rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        unsigned int count = data->res.count;
 
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        while (!list_empty(&data->pages)) {
                struct nfs_page *req = nfs_list_entry(data->pages.next);
                struct page *page = req->wb_page;
                nfs_list_remove_request(req);
 
-               if (status >= 0) {
+               if (task->tk_status >= 0) {
                        if (count < PAGE_CACHE_SIZE) {
                                if (count < req->wb_bytes)
                                        memclear_highpage_flush(page,
@@ -461,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+       .rpc_call_done = nfs_readpage_result_full,
+       .rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
        struct nfs_readargs *argp = &data->args;
        struct nfs_readres *resp = &data->res;
-       int status = task->tk_status;
+       int status;
 
        dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-               task->tk_pid, status);
+               task->tk_pid, task->tk_status);
+
+       status = NFS_PROTO(data->inode)->read_done(task, data);
+       if (status != 0)
+               return status;
+
+       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
        /* Is this a short read? */
        if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+               nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
                /* Has the server at least made some progress? */
                if (resp->count != 0) {
                        /* Yes, so retry the read at the end of the data */
@@ -484,12 +528,14 @@ void nfs_readpage_result(struct rpc_task *task)
                        argp->pgbase += resp->count;
                        argp->count -= resp->count;
                        rpc_restart_call(task);
-                       return;
+                       return -EAGAIN;
                }
                task->tk_status = -EIO;
        }
-       NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
-       data->complete(data, status);
+       spin_lock(&data->inode->i_lock);
+       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&data->inode->i_lock);
+       return 0;
 }
 
 /*
@@ -498,14 +544,17 @@ void nfs_readpage_result(struct rpc_task *task)
  *  -  The error flag is set for this page. This happens only when a
  *     previous async read operation failed.
  */
-int
-nfs_readpage(struct file *file, struct page *page)
+int nfs_readpage(struct file *file, struct page *page)
 {
+       struct nfs_open_context *ctx;
        struct inode *inode = page->mapping->host;
        int             error;
 
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                page, PAGE_CACHE_SIZE, page->index);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
        /*
         * Try to flush any pending writes to the file..
         *
@@ -517,25 +566,33 @@ nfs_readpage(struct file *file, struct page *page)
        if (error)
                goto out_error;
 
+       if (file == NULL) {
+               ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+               if (ctx == NULL)
+                       return -EBADF;
+       } else
+               ctx = get_nfs_open_context((struct nfs_open_context *)
+                               file->private_data);
        if (!IS_SYNC(inode)) {
-               error = nfs_readpage_async(file, inode, page);
+               error = nfs_readpage_async(ctx, inode, page);
                goto out;
        }
 
-       error = nfs_readpage_sync(file, inode, page);
+       error = nfs_readpage_sync(ctx, inode, page);
        if (error < 0 && IS_SWAPFILE(inode))
                printk("Aiee.. nfs swap-in of page failed!\n");
 out:
+       put_nfs_open_context(ctx);
        return error;
 
 out_error:
        unlock_page(page);
-       goto out;
+       return error;
 }
 
 struct nfs_readdesc {
        struct list_head *head;
-       struct file *filp;
+       struct nfs_open_context *ctx;
 };
 
 static int
@@ -550,7 +607,7 @@ readpage_async_filler(void *data, struct page *page)
        len = nfs_page_length(inode, page);
        if (len == 0)
                return nfs_return_empty_page(page);
-       new = nfs_create_request(desc->filp, inode, page, 0, len);
+       new = nfs_create_request(desc->ctx, inode, page, 0, len);
        if (IS_ERR(new)) {
                        SetPageError(page);
                        unlock_page(page);
@@ -558,18 +615,15 @@ readpage_async_filler(void *data, struct page *page)
        }
        if (len < PAGE_CACHE_SIZE)
                memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
-       nfs_lock_request(new);
        nfs_list_add_request(new, desc->head);
        return 0;
 }
 
-int
-nfs_readpages(struct file *filp, struct address_space *mapping,
+int nfs_readpages(struct file *filp, struct address_space *mapping,
                struct list_head *pages, unsigned nr_pages)
 {
        LIST_HEAD(head);
        struct nfs_readdesc desc = {
-               .filp           = filp,
                .head           = &head,
        };
        struct inode *inode = mapping->host;
@@ -580,13 +634,23 @@ nfs_readpages(struct file *filp, struct address_space *mapping,
                        inode->i_sb->s_id,
                        (long long)NFS_FILEID(inode),
                        nr_pages);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+       if (filp == NULL) {
+               desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+               if (desc.ctx == NULL)
+                       return -EBADF;
+       } else
+               desc.ctx = get_nfs_open_context((struct nfs_open_context *)
+                               filp->private_data);
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
        if (!list_empty(&head)) {
                int err = nfs_pagein_list(&head, server->rpages);
                if (!ret)
+                       nfs_add_stats(inode, NFSIOS_READPAGES, err);
                        ret = err;
        }
+       put_nfs_open_context(desc.ctx);
        return ret;
 }
 
@@ -599,10 +663,8 @@ int nfs_init_readpagecache(void)
        if (nfs_rdata_cachep == NULL)
                return -ENOMEM;
 
-       nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
-                                          mempool_alloc_slab,
-                                          mempool_free_slab,
-                                          nfs_rdata_cachep);
+       nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
+                                                    nfs_rdata_cachep);
        if (nfs_rdata_mempool == NULL)
                return -ENOMEM;