X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fnfs%2Fread.c;h=a9c26521a9e2d8a13965e364dae9ac8392cecdb1;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=05eb43fadf8e4296d3e6d41ed4a161c40e9e8efb;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 05eb43fad..a9c26521a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,7 +15,6 @@ * within the RPC code when root squashing is suspected. */ -#include #include #include #include @@ -31,36 +30,58 @@ #include +#include "internal.h" +#include "iostat.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); -static void nfs_readpage_result_partial(struct nfs_read_data *, int); -static void nfs_readpage_result_full(struct nfs_read_data *, int); +static const struct rpc_call_ops nfs_read_partial_ops; +static const struct rpc_call_ops nfs_read_full_ops; -static kmem_cache_t *nfs_rdata_cachep; -mempool_t *nfs_rdata_mempool; +static struct kmem_cache *nfs_rdata_cachep; +static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -void nfs_readdata_release(void *data) +struct nfs_read_data *nfs_readdata_alloc(size_t len) { - nfs_readdata_free(data); + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); + + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; + else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } + return p; } -static -unsigned int nfs_page_length(struct inode *inode, struct page *page) +static void nfs_readdata_rcu_free(struct rcu_head *head) { - loff_t i_size = i_size_read(inode); - unsigned long idx; + struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); + mempool_free(p, nfs_rdata_mempool); +} - if (i_size <= 0) - return 0; - idx = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index > idx) - return 0; - if (page->index != idx) - return PAGE_CACHE_SIZE; - return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); +static void nfs_readdata_free(struct nfs_read_data *rdata) +{ + call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); +} + +void nfs_readdata_release(void *data) +{ + nfs_readdata_free(data); } static @@ -72,6 +93,35 @@ int nfs_return_empty_page(struct page *page) return 0; } +static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) +{ + unsigned int remainder = data->args.count - data->res.count; + unsigned int base = data->args.pgbase + data->res.count; + unsigned int pglen; + struct page **pages; + + if (data->res.eof == 0 || remainder == 0) + return; + /* + * Note: "remainder" can never be negative, since we check for + * this in the XDR code. + */ + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + pglen = PAGE_CACHE_SIZE - base; + for (;;) { + if (remainder <= pglen) { + memclear_highpage_flush(*pages, base, remainder); + break; + } + memclear_highpage_flush(*pages, base, pglen); + pages++; + remainder -= pglen; + pglen = PAGE_CACHE_SIZE; + base = 0; + } +} + /* * Read a page synchronously. */ @@ -80,12 +130,12 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, { unsigned int rsize = NFS_SERVER(inode)->rsize; unsigned int count = PAGE_CACHE_SIZE; - int result; + int result = -ENOMEM; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(1); + rdata = nfs_readdata_alloc(count); if (!rdata) - return -ENOMEM; + goto out_unlock; memset(rdata, 0, sizeof(*rdata)); rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); @@ -112,7 +162,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, rdata->args.offset = page_offset(page) + rdata->args.pgbase; dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", - NFS_SERVER(inode)->hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)NFS_FILEID(inode), (unsigned long long)rdata->args.pgbase, @@ -133,6 +183,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, } count -= result; rdata->args.pgbase += result; + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result); + /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ @@ -143,16 +195,17 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); - if (count) - memclear_highpage_flush(page, rdata->args.pgbase, count); - SetPageUptodate(page); - if (PageError(page)) - ClearPageError(page); + if (rdata->res.eof || rdata->res.count == rdata->args.count) { + SetPageUptodate(page); + if (rdata->res.eof && count != 0) + memclear_highpage_flush(page, rdata->args.pgbase, count); + } result = 0; io_error: - unlock_page(page); nfs_readdata_free(rdata); +out_unlock: + unlock_page(page); return result; } @@ -163,7 +216,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *new; unsigned int len; - len = nfs_page_length(inode, page); + len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); new = nfs_create_request(ctx, inode, page, 0, len); @@ -196,9 +249,11 @@ static void nfs_readpage_release(struct nfs_page *req) * Set up the NFS read request struct */ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset) { struct inode *inode; + int flags; data->req = req; data->inode = inode = req->wb_context->dentry->d_inode; @@ -216,6 +271,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.eof = 0; nfs_fattr_init(&data->fattr); + /* Set up the initial task struct. */ + flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long)inode; @@ -250,9 +308,7 @@ static void nfs_execute_read(struct nfs_read_data *data) sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); - lock_kernel(); rpc_execute(&data->task); - unlock_kernel(); rpc_clnt_sigunmask(clnt, &oldset); } @@ -274,25 +330,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; struct nfs_read_data *data; - unsigned int rsize = NFS_SERVER(inode)->rsize; - unsigned int nbytes, offset; + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; + unsigned int offset; int requests = 0; LIST_HEAD(list); nfs_list_remove_request(req); nbytes = req->wb_bytes; - for(;;) { - data = nfs_readdata_alloc(1); + do { + size_t len = min(nbytes,rsize); + + data = nfs_readdata_alloc(len); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); list_add(&data->pages, &list); requests++; - if (nbytes <= rsize) - break; - nbytes -= rsize; - } + nbytes -= len; + } while(nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); @@ -303,14 +359,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) list_del_init(&data->pages); data->pagevec[0] = page; - data->complete = nfs_readpage_result_partial; if (nbytes > rsize) { - nfs_read_rpcsetup(req, data, rsize, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + rsize, offset); offset += rsize; nbytes -= rsize; } else { - nfs_read_rpcsetup(req, data, nbytes, offset); + nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + nbytes, offset); nbytes = 0; } nfs_execute_read(data); @@ -339,7 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); if (!data) goto out_bad; @@ -356,8 +413,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) } req = nfs_list_entry(data->pages.next); - data->complete = nfs_readpage_result_full; - nfs_read_rpcsetup(req, data, count, 0); + nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); nfs_execute_read(data); return 0; @@ -388,26 +444,74 @@ nfs_pagein_list(struct list_head *head, int rpages) return error; } +/* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +{ + int status; + + dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid, + task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; + + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + + if (task->tk_status == -ESTALE) { + set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); + nfs_mark_for_revalidate(data->inode); + } + spin_lock(&data->inode->i_lock); + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); + return 0; +} + +static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +{ + struct nfs_readargs *argp = &data->args; + struct nfs_readres *resp = &data->res; + + if (resp->eof || resp->count == argp->count) + return 0; + + /* This is a short read! */ + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + /* Has the server at least made some progress? */ + if (resp->count == 0) + return 0; + + /* Yes, so retry the read at the end of the data */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + rpc_restart_call(task); + return -EAGAIN; +} + /* * Handle a read reply that fills part of a page. */ -static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) +static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) { + struct nfs_read_data *data = calldata; struct nfs_page *req = data->req; struct page *page = req->wb_page; - if (status >= 0) { - unsigned int request = data->args.count; - unsigned int result = data->res.count; - - if (result < request) { - memclear_highpage_flush(page, - data->args.pgbase + result, - request - result); - } - } else - SetPageError(page); + if (nfs_readpage_result(task, data) != 0) + return; + if (likely(task->tk_status >= 0)) { + nfs_readpage_truncate_uninitialised_page(data); + if (nfs_readpage_retry(task, data) != 0) + return; + } + if (unlikely(task->tk_status < 0)) + SetPageError(page); if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); @@ -415,68 +519,68 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) } } -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static void nfs_readpage_result_full(struct nfs_read_data *data, int status) +static const struct rpc_call_ops nfs_read_partial_ops = { + .rpc_call_done = nfs_readpage_result_partial, + .rpc_release = nfs_readdata_release, +}; + +static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) { unsigned int count = data->res.count; - - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); - struct page *page = req->wb_page; - nfs_list_remove_request(req); - - if (status >= 0) { - if (count < PAGE_CACHE_SIZE) { - if (count < req->wb_bytes) - memclear_highpage_flush(page, - req->wb_pgbase + count, - req->wb_bytes - count); - count = 0; - } else - count -= PAGE_CACHE_SIZE; - SetPageUptodate(page); - } else - SetPageError(page); - nfs_readpage_release(req); - } + unsigned int base = data->args.pgbase; + struct page **pages; + + if (data->res.eof) + count = data->args.count; + if (unlikely(count == 0)) + return; + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageUptodate(*pages); + if (count == 0) + return; + /* Was this a short read? */ + if (data->res.eof || data->res.count == data->args.count) + SetPageUptodate(*pages); } /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void nfs_readpage_result(struct rpc_task *task, void *calldata) +static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_readargs *argp = &data->args; - struct nfs_readres *resp = &data->res; - int status = task->tk_status; - - dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, status); - - /* Is this a short read? */ - if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { - /* Has the server at least made some progress? */ - if (resp->count != 0) { - /* Yes, so retry the read at the end of the data */ - argp->offset += resp->count; - argp->pgbase += resp->count; - argp->count -= resp->count; - rpc_restart_call(task); + + if (nfs_readpage_result(task, data) != 0) + return; + /* + * Note: nfs_readpage_retry may change the values of + * data->args. In the multi-page case, we therefore need + * to ensure that we call nfs_readpage_set_pages_uptodate() + * first. + */ + if (likely(task->tk_status >= 0)) { + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_set_pages_uptodate(data); + if (nfs_readpage_retry(task, data) != 0) return; - } - task->tk_status = -EIO; } - spin_lock(&data->inode->i_lock); - NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&data->inode->i_lock); - data->complete(data, status); + while (!list_empty(&data->pages)) { + struct nfs_page *req = nfs_list_entry(data->pages.next); + + nfs_list_remove_request(req); + nfs_readpage_release(req); + } } +static const struct rpc_call_ops nfs_read_full_ops = { + .rpc_call_done = nfs_readpage_result_full, + .rpc_release = nfs_readdata_release, +}; + /* * Read a page over NFS. * We read the page synchronously in the following case: @@ -491,6 +595,9 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page->index); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + nfs_add_stats(inode, NFSIOS_READPAGES, 1); + /* * Try to flush any pending writes to the file.. * @@ -502,10 +609,15 @@ int nfs_readpage(struct file *file, struct page *page) if (error) goto out_error; + error = -ESTALE; + if (NFS_STALE(inode)) + goto out_error; + if (file == NULL) { + error = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) - return -EBADF; + goto out_error; } else ctx = get_nfs_open_context((struct nfs_open_context *) file->private_data); @@ -540,7 +652,7 @@ readpage_async_filler(void *data, struct page *page) unsigned int len; nfs_wb_page(inode, page); - len = nfs_page_length(inode, page); + len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); new = nfs_create_request(desc->ctx, inode, page, 0, len); @@ -564,12 +676,16 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, }; struct inode *inode = mapping->host; struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = -ESTALE; dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), nr_pages); + nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + + if (NFS_STALE(inode)) + goto out; if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); @@ -582,13 +698,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) + nfs_add_stats(inode, NFSIOS_READPAGES, err); ret = err; } put_nfs_open_context(desc.ctx); +out: return ret; } -int nfs_init_readpagecache(void) +int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", sizeof(struct nfs_read_data), @@ -597,10 +715,8 @@ int nfs_init_readpagecache(void) if (nfs_rdata_cachep == NULL) return -ENOMEM; - nfs_rdata_mempool = mempool_create(MIN_POOL_READ, - mempool_alloc_slab, - mempool_free_slab, - nfs_rdata_cachep); + nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, + nfs_rdata_cachep); if (nfs_rdata_mempool == NULL) return -ENOMEM; @@ -610,6 +726,5 @@ int nfs_init_readpagecache(void) void nfs_destroy_readpagecache(void) { mempool_destroy(nfs_rdata_mempool); - if (kmem_cache_destroy(nfs_rdata_cachep)) - printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); + kmem_cache_destroy(nfs_rdata_cachep); }