* within the RPC code when root squashing is suspected.
*/
-#include <linux/config.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <asm/system.h>
+#include "internal.h"
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
-static kmem_cache_t *nfs_rdata_cachep;
+static struct kmem_cache *nfs_rdata_cachep;
static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_data *nfs_readdata_alloc(size_t len)
{
- struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+ unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
- if (pagecount < NFS_PAGEVEC_SIZE)
- p->pagevec = &p->page_array[0];
+ p->npages = pagecount;
+ if (pagecount <= ARRAY_SIZE(p->page_array))
+ p->pagevec = p->page_array;
else {
- size_t size = ++pagecount * sizeof(struct page *);
- p->pagevec = kmalloc(size, GFP_NOFS);
- if (p->pagevec) {
- memset(p->pagevec, 0, size);
- } else {
+ p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+ if (!p->pagevec) {
mempool_free(p, nfs_rdata_mempool);
p = NULL;
}
return p;
}
-void nfs_readdata_free(struct nfs_read_data *p)
+static void nfs_readdata_rcu_free(struct rcu_head *head)
{
+ struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_rdata_mempool);
}
-void nfs_readdata_release(void *data)
+static void nfs_readdata_free(struct nfs_read_data *rdata)
{
- nfs_readdata_free(data);
+ call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
}
-static
-unsigned int nfs_page_length(struct inode *inode, struct page *page)
+void nfs_readdata_release(void *data)
{
- loff_t i_size = i_size_read(inode);
- unsigned long idx;
-
- if (i_size <= 0)
- return 0;
- idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
- if (page->index > idx)
- return 0;
- if (page->index != idx)
- return PAGE_CACHE_SIZE;
- return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+ nfs_readdata_free(data);
}
static
return 0;
}
+static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
+{
+ unsigned int remainder = data->args.count - data->res.count;
+ unsigned int base = data->args.pgbase + data->res.count;
+ unsigned int pglen;
+ struct page **pages;
+
+ if (data->res.eof == 0 || remainder == 0)
+ return;
+ /*
+ * Note: "remainder" can never be negative, since we check for
+ * this in the XDR code.
+ */
+ pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
+ base &= ~PAGE_CACHE_MASK;
+ pglen = PAGE_CACHE_SIZE - base;
+ for (;;) {
+ if (remainder <= pglen) {
+ memclear_highpage_flush(*pages, base, remainder);
+ break;
+ }
+ memclear_highpage_flush(*pages, base, pglen);
+ pages++;
+ remainder -= pglen;
+ pglen = PAGE_CACHE_SIZE;
+ base = 0;
+ }
+}
+
/*
* Read a page synchronously.
*/
{
unsigned int rsize = NFS_SERVER(inode)->rsize;
unsigned int count = PAGE_CACHE_SIZE;
- int result;
+ int result = -ENOMEM;
struct nfs_read_data *rdata;
- rdata = nfs_readdata_alloc(1);
+ rdata = nfs_readdata_alloc(count);
if (!rdata)
- return -ENOMEM;
+ goto out_unlock;
memset(rdata, 0, sizeof(*rdata));
rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
rdata->args.offset = page_offset(page) + rdata->args.pgbase;
dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
- NFS_SERVER(inode)->hostname,
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)rdata->args.pgbase,
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&inode->i_lock);
- if (count)
- memclear_highpage_flush(page, rdata->args.pgbase, count);
- SetPageUptodate(page);
- if (PageError(page))
- ClearPageError(page);
+ if (rdata->res.eof || rdata->res.count == rdata->args.count) {
+ SetPageUptodate(page);
+ if (rdata->res.eof && count != 0)
+ memclear_highpage_flush(page, rdata->args.pgbase, count);
+ }
result = 0;
io_error:
- unlock_page(page);
nfs_readdata_free(rdata);
+out_unlock:
+ unlock_page(page);
return result;
}
struct nfs_page *new;
unsigned int len;
- len = nfs_page_length(inode, page);
+ len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(ctx, inode, page, 0, len);
sigset_t oldset;
rpc_clnt_sigmask(clnt, &oldset);
- lock_kernel();
rpc_execute(&data->task);
- unlock_kernel();
rpc_clnt_sigunmask(clnt, &oldset);
}
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
- unsigned int nbytes, offset;
+ size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
+ unsigned int offset;
int requests = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = req->wb_bytes;
- for(;;) {
- data = nfs_readdata_alloc(1);
+ do {
+ size_t len = min(nbytes,rsize);
+
+ data = nfs_readdata_alloc(len);
if (!data)
goto out_bad;
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, &list);
requests++;
- if (nbytes <= rsize)
- break;
- nbytes -= rsize;
- }
+ nbytes -= len;
+ } while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(head, inode);
- data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
+ data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
if (!data)
goto out_bad;
return error;
}
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+ int status;
+
+ dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+ task->tk_status);
+
+ status = NFS_PROTO(data->inode)->read_done(task, data);
+ if (status != 0)
+ return status;
+
+ nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+ if (task->tk_status == -ESTALE) {
+ set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+ nfs_mark_for_revalidate(data->inode);
+ }
+ spin_lock(&data->inode->i_lock);
+ NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+ spin_unlock(&data->inode->i_lock);
+ return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+ struct nfs_readargs *argp = &data->args;
+ struct nfs_readres *resp = &data->res;
+
+ if (resp->eof || resp->count == argp->count)
+ return 0;
+
+ /* This is a short read! */
+ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+ /* Has the server at least made some progress? */
+ if (resp->count == 0)
+ return 0;
+
+ /* Yes, so retry the read at the end of the data */
+ argp->offset += resp->count;
+ argp->pgbase += resp->count;
+ argp->count -= resp->count;
+ rpc_restart_call(task);
+ return -EAGAIN;
+}
+
/*
* Handle a read reply that fills part of a page.
*/
if (nfs_readpage_result(task, data) != 0)
return;
- if (task->tk_status >= 0) {
- unsigned int request = data->args.count;
- unsigned int result = data->res.count;
-
- if (result < request) {
- memclear_highpage_flush(page,
- data->args.pgbase + result,
- request - result);
- }
- } else
- SetPageError(page);
+ if (likely(task->tk_status >= 0)) {
+ nfs_readpage_truncate_uninitialised_page(data);
+ if (nfs_readpage_retry(task, data) != 0)
+ return;
+ }
+ if (unlikely(task->tk_status < 0))
+ SetPageError(page);
if (atomic_dec_and_test(&req->wb_complete)) {
if (!PageError(page))
SetPageUptodate(page);
.rpc_release = nfs_readdata_release,
};
+static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
+{
+ unsigned int count = data->res.count;
+ unsigned int base = data->args.pgbase;
+ struct page **pages;
+
+ if (data->res.eof)
+ count = data->args.count;
+ if (unlikely(count == 0))
+ return;
+ pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
+ base &= ~PAGE_CACHE_MASK;
+ count += base;
+ for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
+ SetPageUptodate(*pages);
+ if (count == 0)
+ return;
+ /* Was this a short read? */
+ if (data->res.eof || data->res.count == data->args.count)
+ SetPageUptodate(*pages);
+}
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
- unsigned int count = data->res.count;
if (nfs_readpage_result(task, data) != 0)
return;
+ /*
+ * Note: nfs_readpage_retry may change the values of
+ * data->args. In the multi-page case, we therefore need
+ * to ensure that we call nfs_readpage_set_pages_uptodate()
+ * first.
+ */
+ if (likely(task->tk_status >= 0)) {
+ nfs_readpage_truncate_uninitialised_page(data);
+ nfs_readpage_set_pages_uptodate(data);
+ if (nfs_readpage_retry(task, data) != 0)
+ return;
+ }
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
- struct page *page = req->wb_page;
- nfs_list_remove_request(req);
- if (task->tk_status >= 0) {
- if (count < PAGE_CACHE_SIZE) {
- if (count < req->wb_bytes)
- memclear_highpage_flush(page,
- req->wb_pgbase + count,
- req->wb_bytes - count);
- count = 0;
- } else
- count -= PAGE_CACHE_SIZE;
- SetPageUptodate(page);
- } else
- SetPageError(page);
+ nfs_list_remove_request(req);
nfs_readpage_release(req);
}
}
.rpc_release = nfs_readdata_release,
};
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
- struct nfs_readargs *argp = &data->args;
- struct nfs_readres *resp = &data->res;
- int status;
-
- dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
- task->tk_pid, task->tk_status);
-
- status = NFS_PROTO(data->inode)->read_done(task, data);
- if (status != 0)
- return status;
-
- nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
- /* Is this a short read? */
- if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
- nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
- /* Has the server at least made some progress? */
- if (resp->count != 0) {
- /* Yes, so retry the read at the end of the data */
- argp->offset += resp->count;
- argp->pgbase += resp->count;
- argp->count -= resp->count;
- rpc_restart_call(task);
- return -EAGAIN;
- }
- task->tk_status = -EIO;
- }
- spin_lock(&data->inode->i_lock);
- NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
- spin_unlock(&data->inode->i_lock);
- return 0;
-}
-
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
if (error)
goto out_error;
+ error = -ESTALE;
+ if (NFS_STALE(inode))
+ goto out_error;
+
if (file == NULL) {
+ error = -EBADF;
ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (ctx == NULL)
- return -EBADF;
+ goto out_error;
} else
ctx = get_nfs_open_context((struct nfs_open_context *)
file->private_data);
unsigned int len;
nfs_wb_page(inode, page);
- len = nfs_page_length(inode, page);
+ len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
new = nfs_create_request(desc->ctx, inode, page, 0, len);
};
struct inode *inode = mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
- int ret;
+ int ret = -ESTALE;
dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
inode->i_sb->s_id,
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ if (NFS_STALE(inode))
+ goto out;
+
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (desc.ctx == NULL)
ret = err;
}
put_nfs_open_context(desc.ctx);
+out:
return ret;
}
-int nfs_init_readpagecache(void)
+int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
sizeof(struct nfs_read_data),
void nfs_destroy_readpagecache(void)
{
mempool_destroy(nfs_rdata_mempool);
- if (kmem_cache_destroy(nfs_rdata_cachep))
- printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
+ kmem_cache_destroy(nfs_rdata_cachep);
}