X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fnfs%2Fnfs4state.c;h=96e5b82c153b7f688331549de5fb6c6e16b7a65a;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=67b0f489849032cd26874f47ba9662213def6cab;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 67b0f4898..96e5b82c1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -43,28 +43,22 @@ #include #include #include +#include +#include #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); -static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); - void init_nfsv4_state(struct nfs_server *server) { @@ -75,10 +69,8 @@ init_nfsv4_state(struct nfs_server *server) void destroy_nfsv4_state(struct nfs_server *server) { - if (server->mnt_path) { - kfree(server->mnt_path); - server->mnt_path = NULL; - } + kfree(server->mnt_path); + server->mnt_path = NULL; if (server->nfs4_state) { nfs4_put_client(server->nfs4_state); server->nfs4_state = NULL; @@ -99,11 +91,10 @@ nfs4_alloc_client(struct in_addr *addr) if (nfs_callback_up() < 0) return NULL; - if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { nfs_callback_down(); return NULL; } - memset(clp, 0, sizeof(*clp)); memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); @@ -111,12 +102,12 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); INIT_LIST_HEAD(&clp->cl_superblocks); - init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_state = 1 << NFS4CLNT_OK; + clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; return clp; } @@ -133,10 +124,8 @@ nfs4_free_client(struct nfs4_client *clp) kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); - if (clp->cl_cred) - put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); - if (clp->cl_rpcclient) + if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); nfs_callback_down(); @@ -199,17 +188,17 @@ nfs4_put_client(struct nfs4_client *clp) list_del(&clp->cl_servers); spin_unlock(&state_spinlock); BUG_ON(!list_empty(&clp->cl_superblocks)); - wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); nfs4_kill_renewd(clp); nfs4_free_client(clp); } -int nfs4_init_client(struct nfs4_client *clp) +static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) { - int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); + int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, + nfs_callback_tcpport, cred); if (status == 0) - status = nfs4_proc_setclientid_confirm(clp); + status = nfs4_proc_setclientid_confirm(clp, cred); if (status == 0) nfs4_schedule_state_renewal(clp); return status; @@ -236,6 +225,32 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) return sp; } +struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct rpc_cred *cred = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (list_empty(&sp->so_states)) + continue; + cred = get_rpccred(sp->so_cred); + break; + } + return cred; +} + +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + + if (!list_empty(&clp->cl_state_owners)) { + sp = list_entry(clp->cl_state_owners.next, + struct nfs4_state_owner, so_list); + return get_rpccred(sp->so_cred); + } + return NULL; +} + static struct nfs4_state_owner * nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) { @@ -263,19 +278,22 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kmalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; - init_MUTEX(&sp->so_sema); - sp->so_seqid = 0; /* arbitrary */ + spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); return sp; } -static void -nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +void +nfs4_drop_state_owner(struct nfs4_state_owner *sp) { struct nfs4_client *clp = sp->so_client; spin_lock(&clp->cl_lock); @@ -307,8 +325,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct new = NULL; } spin_unlock(&clp->cl_lock); - if (new) - kfree(new); + kfree(new); if (sp != NULL) return sp; put_rpccred(cred); @@ -348,45 +365,30 @@ nfs4_alloc_open_state(void) { struct nfs4_state *state; - state = kmalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->state = 0; - state->nreaders = 0; - state->nwriters = 0; - state->flags = 0; - memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); - init_MUTEX(&state->lock_sema); - rwlock_init(&state->state_lock); + spin_lock_init(&state->state_lock); return state; } -static struct nfs4_state * -__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) +void +nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs4_state *state; - - mode &= (FMODE_READ|FMODE_WRITE); - list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->owner->so_cred != cred) - continue; - if ((mode & FMODE_READ) != 0 && state->nreaders == 0) - continue; - if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) - continue; - if ((state->state & mode) != mode) - continue; - atomic_inc(&state->count); - if (mode & FMODE_READ) - state->nreaders++; + if (state->state == mode) + return; + /* NB! List reordering - see the reclaim code for why. */ + if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { if (mode & FMODE_WRITE) - state->nwriters++; - return state; + list_move(&state->open_states, &state->owner->so_states); + else + list_move_tail(&state->open_states, &state->owner->so_states); } - return NULL; + if (mode == 0) + list_del_init(&state->inode_states); + state->state = mode; } static struct nfs4_state * @@ -397,7 +399,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) list_for_each_entry(state, &nfsi->open_states, inode_states) { /* Is this in the process of being freed? */ - if (state->nreaders == 0 && state->nwriters == 0) + if (state->state == 0) continue; if (state->owner == owner) { atomic_inc(&state->count); @@ -407,17 +409,6 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) return NULL; } -struct nfs4_state * -nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) -{ - struct nfs4_state *state; - - spin_lock(&inode->i_lock); - state = __nfs4_find_state(inode, cred, mode); - spin_unlock(&inode->i_lock); - return state; -} - static void nfs4_free_open_state(struct nfs4_state *state) { @@ -436,19 +427,23 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) if (state) goto out; new = nfs4_alloc_open_state(); + spin_lock(&owner->so_lock); spin_lock(&inode->i_lock); state = __nfs4_find_state_byowner(inode, owner); if (state == NULL && new != NULL) { state = new; - /* Caller *must* be holding owner->so_sem */ - list_add(&state->open_states, &owner->so_states); state->owner = owner; atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = igrab(inode); spin_unlock(&inode->i_lock); + /* Note: The reclaim code dictates that we add stateless + * and read-only stateids to the end of the list */ + list_add_tail(&state->open_states, &owner->so_states); + spin_unlock(&owner->so_lock); } else { spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); if (new) nfs4_free_open_state(new); } @@ -458,64 +453,67 @@ out: /* * Beware! Caller must be holding exactly one - * reference to clp->cl_sem and owner->so_sema! + * reference to clp->cl_sem! */ void nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) + if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) return; + spin_lock(&inode->i_lock); if (!list_empty(&state->inode_states)) list_del(&state->inode_states); - spin_unlock(&inode->i_lock); list_del(&state->open_states); + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); iput(inode); - BUG_ON (state->state != 0); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } /* - * Beware! Caller must be holding no references to clp->cl_sem! - * of owner->so_sema! + * Close the current file. */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_client *clp = owner->so_client; - int newstate; + int oldstate, newstate = 0; atomic_inc(&owner->so_count); - down_read(&clp->cl_sem); - down(&owner->so_sema); /* Protect against nfs4_find_state() */ + spin_lock(&owner->so_lock); spin_lock(&inode->i_lock); - if (mode & FMODE_READ) - state->nreaders--; - if (mode & FMODE_WRITE) - state->nwriters--; - if (state->nwriters == 0 && state->nreaders == 0) - list_del_init(&state->inode_states); - spin_unlock(&inode->i_lock); - newstate = 0; - if (state->state != 0) { - if (state->nreaders) - newstate |= FMODE_READ; - if (state->nwriters) - newstate |= FMODE_WRITE; - if (state->state == newstate) - goto out; - if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) - return; + switch (mode & (FMODE_READ | FMODE_WRITE)) { + case FMODE_READ: + state->n_rdonly--; + break; + case FMODE_WRITE: + state->n_wronly--; + break; + case FMODE_READ|FMODE_WRITE: + state->n_rdwr--; } -out: + oldstate = newstate = state->state; + if (state->n_rdwr == 0) { + if (state->n_rdonly == 0) + newstate &= ~FMODE_READ; + if (state->n_wronly == 0) + newstate &= ~FMODE_WRITE; + } + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + nfs4_state_set_mode_locked(state, newstate); + oldstate = newstate; + } + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); + + if (oldstate != newstate && nfs4_do_close(inode, state) == 0) + return; nfs4_put_open_state(state); - up(&owner->so_sema); nfs4_put_state_owner(owner); - up_read(&clp->cl_sem); } /* @@ -535,40 +533,26 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) -{ - struct nfs4_lock_state *lsp; - read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); - read_unlock(&state->state_lock); - return lsp; -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema */ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; - lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; - lsp->ls_flags = 0; - lsp->ls_seqid = 0; /* arbitrary */ - lsp->ls_id = -1; - memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + lsp->ls_seqid.sequence = &state->owner->so_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; - INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } @@ -576,195 +560,249 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema and clp->cl_sem + * The caller must be holding clp->cl_sem */ -struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { - struct nfs4_lock_state * lsp; + struct nfs4_lock_state *lsp, *new = NULL; - lsp = nfs4_find_lock_state(state, owner); - if (lsp == NULL) - lsp = nfs4_alloc_lock_state(state, owner); + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + new->ls_state = state; + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + kfree(new); return lsp; } /* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. + * Release reference to lock_state, and free it if we see that + * it is no longer in use */ -void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { - if (test_bit(LK_STATE_IN_USE, &state->flags)) { - struct nfs4_lock_state *lsp; + struct nfs4_state *state; - lsp = nfs4_find_lock_state(state, fl_owner); - if (lsp) { - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); - nfs4_put_lock_state(lsp); - return; - } - } - memcpy(dst, &state->stateid, sizeof(*dst)); + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + kfree(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) +{ + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; + + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} + +static void nfs4_fl_release_lock(struct file_lock *fl) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -/* -* Check to see if the request lock (type FL_UNLK) effects the fl lock. -* -* fl and request must have the same posix owner -* -* return: -* 0 -> fl not effected by request -* 1 -> fl consumed by request -*/ +static struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; -static int -nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) { - if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) - return 1; + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; return 0; } /* - * Post an initialized lock_state on the state->lock_states list. + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. */ -void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { - if (!list_empty(&lsp->ls_locks)) + struct nfs4_lock_state *lsp; + + memcpy(dst, &state->stateid, sizeof(*dst)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) return; - atomic_inc(&lsp->ls_count); - write_lock(&state->state_lock); - list_add(&lsp->ls_locks, &state->lock_states); - set_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); + nfs4_put_lock_state(lsp); } -/* - * to decide to 'reap' lock state: - * 1) search i_flock for file_locks with fl.lock_state = to ls. - * 2) determine if unlock will consume found lock. - * if so, reap - * - * else, don't reap. - * - */ -void -nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) { - struct inode *inode = state->inode; - struct file_lock *fl; + struct rpc_sequence *sequence = counter->sequence; + struct nfs_seqid *new; - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != lsp->ls_owner) - continue; - /* Exit if we find at least one lock which is not consumed */ - if (nfs4_check_unlock(fl,request) == 0) - return; + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + spin_lock(&sequence->lock); + list_add_tail(&new->list, &sequence->list); + spin_unlock(&sequence->lock); } + return new; +} - write_lock(&state->state_lock); - list_del_init(&lsp->ls_locks); - if (list_empty(&state->lock_states)) - clear_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); - nfs4_put_lock_state(lsp); +void nfs_free_seqid(struct nfs_seqid *seqid) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + + spin_lock(&sequence->lock); + list_del(&seqid->list); + spin_unlock(&sequence->lock); + rpc_wake_up(&sequence->wait); + kfree(seqid); } /* - * Release reference to lock_state, and free it if we see that - * it is no longer in use + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() */ -void -nfs4_put_lock_state(struct nfs4_lock_state *lsp) +static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - if (!atomic_dec_and_test(&lsp->ls_count)) - return; - BUG_ON (!list_empty(&lsp->ls_locks)); - kfree(lsp); + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + if (status == -NFS4ERR_BAD_SEQID) { + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); + nfs4_drop_state_owner(sp); + } + return nfs_increment_seqid(status, seqid); } /* -* Called with sp->so_sema and clp->cl_sem held. -* -* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or -* failed with a seqid incrementing error - -* see comments nfs_fs.h:seqid_mutating_error() -*/ -void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + return nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) { - if (status == NFS_OK || seqid_mutating_err(-status)) - sp->so_seqid++; - /* If the server returns BAD_SEQID, unhash state_owner here */ - if (status == -NFS4ERR_BAD_SEQID) - nfs4_unhash_state_owner(sp); + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + if (sequence->list.next == &seqid->list) + goto out; + spin_lock(&sequence->lock); + if (sequence->list.next != &seqid->list) { + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; + } + spin_unlock(&sequence->lock); +out: + return status; } static int reclaimer(void *); -struct reclaimer_args { - struct nfs4_client *clp; - struct completion complete; -}; + +static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) +{ + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER); + rpc_wake_up(&clp->cl_rpcwaitq); +} /* * State recovery routine */ -void -nfs4_recover_state(void *data) +static void nfs4_recover_state(struct nfs4_client *clp) { - struct nfs4_client *clp = (struct nfs4_client *)data; - struct reclaimer_args args = { - .clp = clp, - }; - might_sleep(); - - init_completion(&args.complete); + struct task_struct *task; - if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) - goto out_failed_clear; - wait_for_completion(&args.complete); - return; -out_failed_clear: - set_bit(NFS4CLNT_OK, &clp->cl_state); - wake_up_all(&clp->cl_waitq); - rpc_wake_up(&clp->cl_rpcwaitq); + __module_get(THIS_MODULE); + atomic_inc(&clp->cl_count); + task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", + NIPQUAD(clp->cl_addr)); + if (!IS_ERR(task)) + return; + nfs4_clear_recover_bit(clp); + nfs4_put_client(clp); + module_put(THIS_MODULE); } /* * Schedule a state recovery attempt */ -void -nfs4_schedule_state_recovery(struct nfs4_client *clp) +void nfs4_schedule_state_recovery(struct nfs4_client *clp) { if (!clp) return; - if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state)) - schedule_work(&clp->cl_recoverd); + if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) + nfs4_recover_state(clp); } -static int nfs4_reclaim_locks(struct nfs4_state *state) +static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) { struct inode *inode = state->inode; struct file_lock *fl; int status = 0; for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) + if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) continue; - status = nfs4_lock_reclaim(state, fl); + status = ops->recover_lock(state, fl); if (status >= 0) continue; switch (status) { @@ -775,7 +813,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state) case -NFS4ERR_NO_GRACE: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: - /* kill_proc(fl->fl_owner, SIGLOST, 1); */ + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ break; case -NFS4ERR_STALE_CLIENTID: goto out_err; @@ -786,20 +824,26 @@ out_err: return status; } -static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) { struct nfs4_state *state; struct nfs4_lock_state *lock; int status = 0; + /* Note: we rely on the sp->so_states list being ordered + * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) + * states first. + * This is needed to ensure that the server won't give us any + * read delegations that we have to return if, say, we are + * recovering after a network partition or a reboot from a + * server that doesn't support a grace period. + */ list_for_each_entry(state, &sp->so_states, open_states) { if (state->state == 0) continue; - status = nfs4_open_reclaim(sp, state); - list_for_each_entry(lock, &state->lock_states, ls_locks) - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + status = ops->recover_open(sp, state); if (status >= 0) { - status = nfs4_reclaim_locks(state); + status = nfs4_reclaim_locks(ops, state); if (status < 0) goto out_err; list_for_each_entry(lock, &state->lock_states, ls_locks) { @@ -813,8 +857,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __FUNCTION__, status); - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: + case -ENOENT: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: /* @@ -826,6 +869,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: case -NFS4ERR_STALE_CLIENTID: goto out_err; } @@ -835,19 +880,38 @@ out_err: return status; } +static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state; + struct nfs4_lock_state *lock; + + /* Reset all sequence ids to zero */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + sp->so_seqid.counter = 0; + sp->so_seqid.flags = 0; + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } + } + spin_unlock(&sp->so_lock); + } +} + static int reclaimer(void *ptr) { - struct reclaimer_args *args = (struct reclaimer_args *)ptr; - struct nfs4_client *clp = args->clp; + struct nfs4_client *clp = ptr; struct nfs4_state_owner *sp; + struct nfs4_state_recovery_ops *ops; + struct rpc_cred *cred; int status = 0; - daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); allow_signal(SIGKILL); - atomic_inc(&clp->cl_count); - complete(&args->complete); - /* Ensure exclusive access to NFSv4 state */ lock_kernel(); down_write(&clp->cl_sem); @@ -855,37 +919,65 @@ static int reclaimer(void *ptr) if (list_empty(&clp->cl_superblocks)) goto out; restart_loop: - status = nfs4_proc_renew(clp); - if (status == 0 || status == -NFS4ERR_CB_PATH_DOWN) - goto out; - status = nfs4_init_client(clp); + ops = &nfs4_network_partition_recovery_ops; + /* Are there any open files on this volume? */ + cred = nfs4_get_renew_cred(clp); + if (cred != NULL) { + /* Yes there are: try to renew the old lease */ + status = nfs4_proc_renew(clp, cred); + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + put_rpccred(cred); + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + } + } else { + /* "reboot" to ensure we clear all state on the server */ + clp->cl_boot_time = CURRENT_TIME; + cred = nfs4_get_setclientid_cred(clp); + } + /* We're going to have to re-establish a clientid */ + nfs4_state_mark_reclaim(clp); + status = -ENOENT; + if (cred != NULL) { + status = nfs4_init_client(clp, cred); + put_rpccred(cred); + } if (status) goto out_error; - /* Mark all delagations for reclaim */ + /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - status = nfs4_reclaim_open_state(sp); + status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { + if (status == -NFS4ERR_NO_GRACE) { + ops = &nfs4_network_partition_recovery_ops; + status = nfs4_reclaim_open_state(ops, sp); + } if (status == -NFS4ERR_STALE_CLIENTID) goto restart_loop; - goto out_error; + if (status == -NFS4ERR_EXPIRED) + goto restart_loop; } } nfs_delegation_reap_unclaimed(clp); out: - set_bit(NFS4CLNT_OK, &clp->cl_state); up_write(&clp->cl_sem); unlock_kernel(); - wake_up_all(&clp->cl_waitq); - rpc_wake_up(&clp->cl_rpcwaitq); if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); + nfs4_clear_recover_bit(clp); nfs4_put_client(clp); + module_put_and_exit(0); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", NIPQUAD(clp->cl_addr.s_addr), -status); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; }