vserver 1.9.5.x5

[linux-2.6.git] / fs / nfsd / nfs4state.c
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 73a8944..498f302 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -44,6 +44,7 @@
  #include <linux/mount.h>
  #include <linux/workqueue.h>
  #include <linux/smp_lock.h>
+#include <linux/kthread.h>
  #include <linux/nfs4.h>
  #include <linux/nfsd/state.h>
  #include <linux/nfsd/xdr4.h>
@@ -51,11 +52,15 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
  /* Globals */
+static time_t lease_time = 90;     /* default lease time */
+static time_t old_lease_time = 90; /* past incarnation lease time */
+static u32 nfs4_reclaim_init = 0;
  time_t boot_time;
  static time_t grace_end = 0;
  static u32 current_clientid = 1;
-static u32 current_ownerid;
-static u32 current_fileid;
+static u32 current_ownerid = 1;
+static u32 current_fileid = 1;
+static u32 current_delegid = 1;
  static u32 nfs4_init;
  stateid_t zerostateid;             /* bits all 0 */
  stateid_t onestateid;              /* bits all 1 */
@@ -72,9 +77,14 @@ u32 free_sowner = 0;
  u32 vfsopen = 0;
  u32 vfsclose = 0;
  u32 alloc_lsowner= 0;
+u32 alloc_delegation= 0;
+u32 free_delegation= 0;
  
  /* forward declarations */
  struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+static void release_delegation(struct nfs4_delegation *dp);
+static void release_stateid_lockowner(struct nfs4_stateid *open_stp);
  
  /* Locking:
   *
@@ -82,7 +92,7 @@ struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
   *     protects clientid_hashtbl[], clientstr_hashtbl[],
   *     unconfstr_hashtbl[], uncofid_hashtbl[].
   */
-static struct semaphore client_sema;
+static DECLARE_MUTEX(client_sema);
  
  void
  nfs4_lock_state(void)
@@ -90,9 +100,6 @@ nfs4_lock_state(void)
         down(&client_sema);
  }
  
-/*
- * nfs4_unlock_state(); called in encode
- */
  void
  nfs4_unlock_state(void)
  {
@@ -117,6 +124,111 @@ static void release_stateowner(struct nfs4_stateowner *sop);
  static void release_stateid(struct nfs4_stateid *stp, int flags);
  static void release_file(struct nfs4_file *fp);
  
+/*
+ * Delegation state
+ */
+
+/* recall_lock protects the del_recall_lru */
+spinlock_t recall_lock;
+static struct list_head del_recall_lru;
+
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, struct svc_fh *current_fh, u32 type)
+{
+       struct nfs4_delegation *dp;
+
+       dprintk("NFSD alloc_init_deleg\n");
+       if ((dp = kmalloc(sizeof(struct nfs4_delegation),
+               GFP_KERNEL)) == NULL)
+               return dp;
+       INIT_LIST_HEAD(&dp->dl_del_perfile);
+       INIT_LIST_HEAD(&dp->dl_del_perclnt);
+       INIT_LIST_HEAD(&dp->dl_recall_lru);
+       dp->dl_client = clp;
+       dp->dl_file = fp;
+       dp->dl_flock = NULL;
+       dp->dl_stp = NULL;
+       dp->dl_type = type;
+       dp->dl_recall.cbr_dp = NULL;
+       dp->dl_recall.cbr_ident = 0;
+       dp->dl_recall.cbr_trunc = 0;
+       dp->dl_stateid.si_boot = boot_time;
+       dp->dl_stateid.si_stateownerid = current_delegid++;
+       dp->dl_stateid.si_fileid = 0;
+       dp->dl_stateid.si_generation = 0;
+       dp->dl_fhlen = current_fh->fh_handle.fh_size;
+       memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
+                       current_fh->fh_handle.fh_size);
+       dp->dl_time = 0;
+       atomic_set(&dp->dl_state, NFS4_NO_RECALL);
+       atomic_set(&dp->dl_count, 1);
+       atomic_set(&dp->dl_recall_cnt, 0);
+       list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+       list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+       alloc_delegation++;
+       return dp;
+}
+
+/*
+ * Free the delegation structure.
+ * Called with the recall_lock held.
+ */
+static void
+nfs4_free_delegation(struct nfs4_delegation *dp)
+{
+       dprintk("NFSD: nfs4_free_delegation freeing dp %p\n",dp);
+       list_del(&dp->dl_recall_lru);
+       kfree(dp);
+       free_delegation++;
+}
+
+/* release_delegation:
+ *
+ * Remove the associated file_lock first, then remove the delegation.
+ * lease_modify() is called to remove the FS_LEASE file_lock from
+ * the i_flock list, eventually calling nfsd's lock_manager
+ * fl_release_callback.
+ *
+ * call either:
+ *   nfsd_close : if last close, locks_remove_flock calls lease_modify.
+ *                otherwise, recalled state set to NFS4_RECALL_COMPLETE
+ *                so that it will be reaped by the laundromat service.
+ * or
+ *   remove_lease (calls time_out_lease which calls lease_modify).
+ *   and nfs4_free_delegation.
+ *
+ * Called with nfs_lock_state() held.
+ * Called with the recall_lock held.
+ */
+
+static void
+release_delegation(struct nfs4_delegation *dp)
+{
+       /* delayed nfsd_close */
+       if (dp->dl_stp) {
+               struct file *filp = dp->dl_stp->st_vfs_file;
+
+               dprintk("NFSD: release_delegation CLOSE\n");
+               release_stateid_lockowner(dp->dl_stp);
+               kfree(dp->dl_stp);
+               dp->dl_stp = NULL;
+               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+               nfsd_close(filp);
+               vfsclose++;
+       } else {
+               dprintk("NFSD: release_delegation remove lease dl_flock %p\n",
+                       dp->dl_flock);
+               remove_lease(dp->dl_flock);
+               list_del_init(&dp->dl_del_perfile);
+               list_del_init(&dp->dl_del_perclnt);
+               /* dl_count > 0 => outstanding recall rpc */
+               dprintk("NFSD: release_delegation free deleg dl_count %d\n",
+                                  atomic_read(&dp->dl_count));
+               if ((atomic_read(&dp->dl_state) == NFS4_REAP_DELEG)
+                    || atomic_dec_and_test(&dp->dl_count))
+                       nfs4_free_delegation(dp);
+       }
+}
  
  /* 
   * SETCLIENTID state 
@@ -131,8 +243,11 @@ static void release_file(struct nfs4_file *fp);
         ((id) & CLIENT_HASH_MASK)
  #define clientstr_hashval(name, namelen) \
         (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
-
-/* conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+/*
+ * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+ * used in reboot/reset lease grace period processing
+ *
+ * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
   * setclientid_confirmed info. 
   *
   * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
@@ -144,6 +259,8 @@ static void release_file(struct nfs4_file *fp);
   * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
   * for last close replay.
   */
+static struct list_head        reclaim_str_hashtbl[CLIENT_HASH_SIZE];
+static int reclaim_str_hashtbl_size;
  static struct list_head        conf_id_hashtbl[CLIENT_HASH_SIZE];
  static struct list_head        conf_str_hashtbl[CLIENT_HASH_SIZE];
  static struct list_head        unconf_str_hashtbl[CLIENT_HASH_SIZE];
@@ -208,12 +325,40 @@ free_client(struct nfs4_client *clp)
         kfree(clp);
  }
  
+void
+put_nfs4_client(struct nfs4_client *clp)
+{
+       if (atomic_dec_and_test(&clp->cl_count))
+               free_client(clp);
+}
+
  static void
  expire_client(struct nfs4_client *clp)
  {
         struct nfs4_stateowner *sop;
+       struct nfs4_delegation *dp;
+       struct nfs4_callback *cb = &clp->cl_callback;
+       struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+
+       dprintk("NFSD: expire_client cl_count %d\n",
+                           atomic_read(&clp->cl_count));
+
+       /* shutdown rpc client, ending any outstanding recall rpcs */
+       if (atomic_read(&cb->cb_set) == 1 && clnt) {
+               rpc_shutdown_client(clnt);
+               clnt = clp->cl_callback.cb_client = NULL;
+       }
+       spin_lock(&recall_lock);
+       while (!list_empty(&clp->cl_del_perclnt)) {
+               dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+               dprintk("NFSD: expire client. dp %p, dl_state %d, fp %p\n",
+                               dp, atomic_read(&dp->dl_state), dp->dl_flock);
  
-       dprintk("NFSD: expire_client\n");
+               /* force release of delegation. */
+               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+               release_delegation(dp);
+       }
+       spin_unlock(&recall_lock);
         list_del(&clp->cl_idhash);
         list_del(&clp->cl_strhash);
         list_del(&clp->cl_lru);
@@ -221,18 +366,22 @@ expire_client(struct nfs4_client *clp)
                 sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
                 release_stateowner(sop);
         }
-       free_client(clp);
+       put_nfs4_client(clp);
  }
  
  static struct nfs4_client *
  create_client(struct xdr_netobj name) {
         struct nfs4_client *clp;
  
-       if(!(clp = alloc_client(name)))
+       if (!(clp = alloc_client(name)))
                 goto out;
+       atomic_set(&clp->cl_count, 1);
+       atomic_set(&clp->cl_callback.cb_set, 0);
+       clp->cl_callback.cb_parsed = 0;
         INIT_LIST_HEAD(&clp->cl_idhash);
         INIT_LIST_HEAD(&clp->cl_strhash);
         INIT_LIST_HEAD(&clp->cl_perclient);
+       INIT_LIST_HEAD(&clp->cl_del_perclnt);
         INIT_LIST_HEAD(&clp->cl_lru);
  out:
         return clp;
@@ -260,7 +409,7 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
  
  static int
  cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
-       if(!n1 || !n2)
+       if (!n1 || !n2)
                 return 0;
         return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
  }
@@ -339,6 +488,102 @@ move_to_confirmed(struct nfs4_client *clp, unsigned int idhashval)
         renew_client(clp);
  }
  
+
+/* a helper function for parse_callback */
+static int
+parse_octet(unsigned int *lenp, char **addrp)
+{
+       unsigned int len = *lenp;
+       char *p = *addrp;
+       int n = -1;
+       char c;
+
+       for (;;) {
+               if (!len)
+                       break;
+               len--;
+               c = *p++;
+               if (c == '.')
+                       break;
+               if ((c < '0') || (c > '9')) {
+                       n = -1;
+                       break;
+               }
+               if (n < 0)
+                       n = 0;
+               n = (n * 10) + (c - '0');
+               if (n > 255) {
+                       n = -1;
+                       break;
+               }
+       }
+       *lenp = len;
+       *addrp = p;
+       return n;
+}
+
+/* parse and set the setclientid ipv4 callback address */
+int
+parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
+{
+       int temp = 0;
+       u32 cbaddr = 0;
+       u16 cbport = 0;
+       u32 addrlen = addr_len;
+       char *addr = addr_val;
+       int i, shift;
+
+       /* ipaddress */
+       shift = 24;
+       for(i = 4; i > 0  ; i--) {
+               if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+                       return 0;
+               }
+               cbaddr |= (temp << shift);
+               if (shift > 0)
+               shift -= 8;
+       }
+       *cbaddrp = cbaddr;
+
+       /* port */
+       shift = 8;
+       for(i = 2; i > 0  ; i--) {
+               if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+                       return 0;
+               }
+               cbport |= (temp << shift);
+               if (shift > 0)
+                       shift -= 8;
+       }
+       *cbportp = cbport;
+       return 1;
+}
+
+void
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+{
+       struct nfs4_callback *cb = &clp->cl_callback;
+
+       /* Currently, we only support tcp for the callback channel */
+       if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
+               goto out_err;
+
+       if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
+                        &cb->cb_addr, &cb->cb_port)))
+               goto out_err;
+       cb->cb_prog = se->se_callback_prog;
+       cb->cb_ident = se->se_callback_ident;
+       cb->cb_parsed = 1;
+       return;
+out_err:
+       printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+               "will not receive delegations\n",
+               clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+
+       cb->cb_parsed = 0;
+       return;
+}
+
  /*
   * RFC 3010 has a complex implmentation description of processing a 
   * SETCLIENTID request consisting of 5 bullets, labeled as 
@@ -450,6 +695,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 copy_cred(&new->cl_cred,&rqstp->rq_cred);
                 gen_clid(new);
                 gen_confirm(new);
+               gen_callback(new, setclid);
                 add_to_unconfirmed(new, strhashval);
         } else if (cmp_verf(&conf->cl_verifier, &clverifier)) {
                 /*
@@ -477,6 +723,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 copy_cred(&new->cl_cred,&rqstp->rq_cred);
                 copy_clid(new, conf);
                 gen_confirm(new);
+               gen_callback(new, setclid);
                 add_to_unconfirmed(new,strhashval);
         } else if (!unconf) {
                 /*
@@ -494,16 +741,15 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 copy_cred(&new->cl_cred,&rqstp->rq_cred);
                 gen_clid(new);
                 gen_confirm(new);
+               gen_callback(new, setclid);
                 add_to_unconfirmed(new, strhashval);
-       } else if (!cmp_clid(&conf->cl_clientid, &unconf->cl_clientid) &&
-             !cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
+       } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
                 /*      
                  * CASE3:
                  * confirmed found (name, principal match)
                  * confirmed verifier does not match input clverifier
                  *
                  * unconfirmed found (name match)
-                * confirmed->cl_clientid != unconfirmed->cl_clientid and
                  * confirmed->cl_confirm != unconfirmed->cl_confirm
                  *
                  * remove unconfirmed.
@@ -521,6 +767,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 copy_cred(&new->cl_cred,&rqstp->rq_cred);
                 gen_clid(new);
                 gen_confirm(new);
+               gen_callback(new, setclid);
                 add_to_unconfirmed(new, strhashval);
         } else {
                 /* No cases hit !!! */
@@ -531,7 +778,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
         setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
         setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
         memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
-       printk(KERN_INFO "NFSD: this client will not receive delegations\n");
         status = nfs_ok;
  out:
         nfs4_unlock_state();
@@ -556,9 +802,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
         clientid_t * clid = &setclientid_confirm->sc_clientid;
         int status;
  
-       status = nfserr_stale_clientid;
         if (STALE_CLIENTID(clid))
-               goto out;
+               return nfserr_stale_clientid;
         /* 
          * XXX The Duplicate Request Cache (DRC) has been checked (??)
          * We get here on a DRC miss.
@@ -612,6 +857,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
                         status = nfserr_clid_inuse;
                 else {
                         expire_client(conf);
+                       clp = unconf;
                         move_to_confirmed(unconf, idhashval);
                         status = nfs_ok;
                 }
@@ -629,6 +875,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
                 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
                         status = nfserr_clid_inuse;
                 } else {
+                       clp = conf;
                         status = nfs_ok;
                 }
                 goto out;
@@ -643,6 +890,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
                         status = nfserr_clid_inuse;
                 } else {
                         status = nfs_ok;
+                       clp = unconf;
                         move_to_confirmed(unconf, idhashval);
                 }
                 goto out;
@@ -662,7 +910,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
         status = nfserr_inval;
         goto out;
  out:
-       /* XXX if status == nfs_ok, probe callback path */
+       if (!status)
+               nfsd4_probe_callback(clp);
         nfs4_unlock_state();
         return status;
  }
@@ -708,13 +957,14 @@ alloc_init_file(unsigned int hashval, struct inode *ino) {
         if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
                 INIT_LIST_HEAD(&fp->fi_hash);
                 INIT_LIST_HEAD(&fp->fi_perfile);
+               INIT_LIST_HEAD(&fp->fi_del_perfile);
                 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
                 fp->fi_inode = igrab(ino);
                 fp->fi_id = current_fileid++;
                 alloc_file++;
                 return fp;
         }
-       return (struct nfs4_file *)NULL;
+       return NULL;
  }
  
  static void
@@ -727,7 +977,7 @@ release_all_files(void)
                 while (!list_empty(&file_hashtbl[i])) {
                         fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
                         /* this should never be more than once... */
-                       if(!list_empty(&fp->fi_perfile)) {
+                       if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
                                 printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
                         }
                         release_file(fp);
@@ -735,31 +985,32 @@ release_all_files(void)
         }
  }
  
+/* should use a slab cache */
+void
+nfs4_free_stateowner(struct kref *kref)
+{
+       struct nfs4_stateowner *sop =
+               container_of(kref, struct nfs4_stateowner, so_ref);
+       kfree(sop->so_owner.data);
+       kfree(sop);
+       free_sowner++;
+}
+
  static inline struct nfs4_stateowner *
  alloc_stateowner(struct xdr_netobj *owner)
  {
         struct nfs4_stateowner *sop;
  
         if ((sop = kmalloc(sizeof(struct nfs4_stateowner),GFP_KERNEL))) {
-               if((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
+               if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
                         memcpy(sop->so_owner.data, owner->data, owner->len);
                         sop->so_owner.len = owner->len;
+                       kref_init(&sop->so_ref);
                         return sop;
                 } 
                 kfree(sop);
         }
-       return (struct nfs4_stateowner *)NULL;
-}
-
-/* should use a slab cache */
-static void
-free_stateowner(struct nfs4_stateowner *sop) {
-       if(sop) {
-               kfree(sop->so_owner.data);
-               kfree(sop);
-               sop = NULL;
-               free_sowner++;
-       }
+       return NULL;
  }
  
  static struct nfs4_stateowner *
@@ -769,7 +1020,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
         unsigned int idhashval;
  
         if (!(sop = alloc_stateowner(&open->op_owner)))
-               return (struct nfs4_stateowner *)NULL;
+               return NULL;
         idhashval = ownerid_hashval(current_ownerid);
         INIT_LIST_HEAD(&sop->so_idhash);
         INIT_LIST_HEAD(&sop->so_strhash);
@@ -810,7 +1061,7 @@ release_stateid_lockowner(struct nfs4_stateid *open_stp)
  }
  
  static void
-release_stateowner(struct nfs4_stateowner *sop)
+unhash_stateowner(struct nfs4_stateowner *sop)
  {
         struct nfs4_stateid *stp;
  
@@ -818,17 +1069,23 @@ release_stateowner(struct nfs4_stateowner *sop)
         list_del(&sop->so_strhash);
         list_del(&sop->so_perclient);
         list_del(&sop->so_perlockowner);
-       list_del(&sop->so_close_lru);
         del_perclient++;
         while (!list_empty(&sop->so_perfilestate)) {
                 stp = list_entry(sop->so_perfilestate.next, 
                         struct nfs4_stateid, st_perfilestate);
-               if(sop->so_is_open_owner)
+               if (sop->so_is_open_owner)
                         release_stateid(stp, OPEN_STATE);
                 else
                         release_stateid(stp, LOCK_STATE);
         }
-       free_stateowner(sop);
+}
+
+static void
+release_stateowner(struct nfs4_stateowner *sop)
+{
+       unhash_stateowner(sop);
+       list_del(&sop->so_close_lru);
+       nfs4_put_stateowner(sop);
  }
  
  static inline void
@@ -855,21 +1112,34 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfs4_stateow
         __set_bit(open->op_share_deny, &stp->st_deny_bmap);
  }
  
+/*
+* Because nfsd_close() can call locks_remove_flock() which removes leases,
+* delay nfsd_close() for delegations from the nfsd_open() clientid
+* until the delegation is reaped.
+*/
  static void
-release_stateid(struct nfs4_stateid *stp, int flags) {
+release_stateid(struct nfs4_stateid *stp, int flags)
+{
+       struct nfs4_delegation *dp;
+       struct nfs4_file *fp = stp->st_file;
  
         list_del(&stp->st_hash);
         list_del_perfile++;
         list_del(&stp->st_perfile);
         list_del(&stp->st_perfilestate);
-       if((stp->st_vfs_set) && (flags & OPEN_STATE)) {
+       if ((stp->st_vfs_set) && (flags & OPEN_STATE)) {
+               list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+                       if(cmp_clid(&dp->dl_client->cl_clientid,
+                           &stp->st_stateowner->so_client->cl_clientid)) {
+                               dp->dl_stp = stp;
+                               return;
+                       }
+               }
                 release_stateid_lockowner(stp);
-               nfsd_close(&stp->st_vfs_file);
+               nfsd_close(stp->st_vfs_file);
                 vfsclose++;
-               dput(stp->st_vfs_file.f_dentry);
-               mntput(stp->st_vfs_file.f_vfsmnt);
         } else if ((stp->st_vfs_set) && (flags & LOCK_STATE)) {
-               struct file *filp = &stp->st_vfs_file;
+               struct file *filp = stp->st_vfs_file;
  
                 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
         }
@@ -890,13 +1160,10 @@ void
  move_to_close_lru(struct nfs4_stateowner *sop)
  {
         dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
-       /* remove stateowner from all other hash lists except perclient */
-       list_del_init(&sop->so_idhash);
-       list_del_init(&sop->so_strhash);
-       list_del_init(&sop->so_perlockowner);
  
-        list_add_tail(&sop->so_close_lru, &close_lru);
-        sop->so_time = get_seconds();
+       unhash_stateowner(sop);
+       list_add_tail(&sop->so_close_lru, &close_lru);
+       sop->so_time = get_seconds();
  }
  
  void
@@ -916,7 +1183,7 @@ release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp,
         if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
                 move_to_close_lru(sop);
         /* unused nfs4_file's are releseed. XXX slab cache? */
-       if (list_empty(&fp->fi_perfile)) {
+       if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
                 release_file(fp);
         }
  }
@@ -934,7 +1201,7 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nf
         struct nfs4_stateowner *local = NULL;
  
         list_for_each_entry(local, &ownerstr_hashtbl[hashval], so_strhash) {
-               if(!cmp_owner_str(local, &open->op_owner, &open->op_clientid)) 
+               if (!cmp_owner_str(local, &open->op_owner, &open->op_clientid))
                         continue;
                 *op = local;
                 return(1);
@@ -982,7 +1249,7 @@ set_access(unsigned int *access, unsigned long bmap) {
  
         *access = 0;
         for (i = 1; i < 4; i++) {
-               if(test_bit(i, &bmap))
+               if (test_bit(i, &bmap))
                         *access |= i;
         }
  }
@@ -993,7 +1260,7 @@ set_deny(unsigned int *deny, unsigned long bmap) {
  
         *deny = 0;
         for (i = 0; i < 4; i++) {
-               if(test_bit(i, &bmap))
+               if (test_bit(i, &bmap))
                         *deny |= i ;
         }
  }
@@ -1035,30 +1302,110 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
         return nfs_ok;
  }
  
-static inline int
-nfs4_file_upgrade(struct file *filp, unsigned int share_access)
+static inline void
+nfs4_file_downgrade(struct file *filp, unsigned int share_access)
  {
-int status;
-
         if (share_access & NFS4_SHARE_ACCESS_WRITE) {
-               status = get_write_access(filp->f_dentry->d_inode);
-               if (!status)
-                       filp->f_mode = FMODE_WRITE;
-               else
-                       return nfserrno(status);
+               put_write_access(filp->f_dentry->d_inode);
+               filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
         }
-       return nfs_ok;
  }
  
-static inline void
-nfs4_file_downgrade(struct file *filp, unsigned int share_access)
+/*
+ * Recall a delegation
+ */
+static int
+do_recall(void *__dp)
  {
-       if (share_access & NFS4_SHARE_ACCESS_WRITE) {
-               put_write_access(filp->f_dentry->d_inode);
-               filp->f_mode = FMODE_READ;
+       struct nfs4_delegation *dp = __dp;
+
+       daemonize("nfsv4-recall");
+
+       atomic_inc(&dp->dl_count);
+       nfsd4_cb_recall(dp);
+       return 0;
+}
+
+/*
+ * Spawn a thread to perform a recall on the delegation represented
+ * by the lease (file_lock)
+ *
+ * Called from break_lease() with lock_kernel() held,
+ *
+ */
+static
+void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+       struct nfs4_delegation *dp=  (struct nfs4_delegation *)fl->fl_owner;
+       struct task_struct *t;
+
+       dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
+       if (!dp)
+               return;
+
+       /* schedule delegation for recall */
+       spin_lock(&recall_lock);
+       atomic_set(&dp->dl_state, NFS4_RECALL_IN_PROGRESS);
+       list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+       spin_unlock(&recall_lock);
+
+       /* only place dl_time is set. protected by lock_kernel*/
+       dp->dl_time = get_seconds();
+
+       /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
+       fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
+
+       t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
+       if (IS_ERR(t)) {
+               struct nfs4_client *clp = dp->dl_client;
+
+               printk(KERN_INFO "NFSD: Callback thread failed for "
+                       "for client (clientid %08x/%08x)\n",
+                       clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
         }
  }
  
+/*
+ * The file_lock is being reapd.
+ *
+ * Called by locks_free_lock() with lock_kernel() held.
+ */
+static
+void nfsd_release_deleg_cb(struct file_lock *fl)
+{
+       struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+
+       dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d, dl_state %d\n", fl,dp, atomic_read(&dp->dl_count), atomic_read(&dp->dl_state));
+
+       if (!(fl->fl_flags & FL_LEASE) || !dp)
+               return;
+       atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
+       dp->dl_flock = NULL;
+}
+
+/*
+ * Set the delegation file_lock back pointer.
+ *
+ * Called from __setlease() with lock_kernel() held.
+ */
+static
+void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
+{
+       struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
+
+       dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
+       if (!dp)
+               return;
+       dp->dl_flock = new;
+}
+
+struct lock_manager_operations nfsd_lease_mng_ops = {
+        .fl_break = nfsd_break_deleg_cb,
+        .fl_release_private = nfsd_release_deleg_cb,
+        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
+};
+
+
  
  /*
   * nfsd4_process_open1()
@@ -1154,135 +1501,298 @@ out:
                 status = nfserr_reclaim_bad;
         return status;
  }
+
+static int
+nfs4_deleg_conflict(u32 share, u32 dtype)
+{
+       return (((share & NFS4_SHARE_ACCESS_WRITE) &&
+               dtype == NFS4_OPEN_DELEGATE_READ) ||
+               ((share & NFS4_SHARE_ACCESS_READ) &&
+               dtype == NFS4_OPEN_DELEGATE_WRITE));
+}
+
+#define DONT_DELEGATE  8
+
+/*
+ * nfs4_check_deleg_recall()
+ *
+ * Test any delegation that is currently within an incompleted recalled
+ * state, and return NFSERR_DELAY for conflicting open share.
+ * flag is set to DONT_DELEGATE for shares that match the deleg type.
+ */
+static int
+nfs4_check_deleg_recall(struct nfs4_file *fp, struct nfsd4_open *op, int *flag)
+{
+       struct nfs4_delegation *dp;
+       int status = 0;
+
+       list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+               dprintk("NFSD: found delegation %p with dl_state %d\n",
+                                        dp, atomic_read(&dp->dl_state));
+               if (atomic_read(&dp->dl_state) == NFS4_RECALL_IN_PROGRESS) {
+                       if(nfs4_deleg_conflict(op->op_share_access, dp->dl_type))
+                               status = nfserr_jukebox;
+                       else
+                               *flag = DONT_DELEGATE;
+               }
+       }
+       return status;
+}
+
+static int
+nfs4_check_open(struct nfs4_file *fp, struct nfs4_stateowner *sop, struct nfsd4_open *open, struct nfs4_stateid **stpp)
+{
+       struct nfs4_stateid *local;
+       int status = nfserr_share_denied;
+
+       list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+               /* have we seen this open owner */
+               if (local->st_stateowner == sop) {
+                       *stpp = local;
+                       continue;
+               }
+               /* ignore lock owners */
+               if (local->st_stateowner->so_is_open_owner == 0)
+                       continue;
+               /* check for conflicting share reservations */
+               if (!test_share(local, open))
+                       goto out;
+       }
+       status = 0;
+out:
+       return status;
+}
+
+static int
+nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+               struct svc_fh *cur_fh, int flags)
+{
+       struct nfs4_stateid *stp;
+       int status;
+
+       stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+       if (stp == NULL)
+               return nfserr_resource;
+
+       status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
+       if (status) {
+               if (status == nfserr_dropit)
+                       status = nfserr_jukebox;
+               kfree(stp);
+               return status;
+       }
+       vfsopen++;
+       stp->st_vfs_set = 1;
+       *stpp = stp;
+       return 0;
+}
+
+static int
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
+{
+       struct file *filp = stp->st_vfs_file;
+       struct inode *inode = filp->f_dentry->d_inode;
+       unsigned int share_access;
+       int status;
+
+       set_access(&share_access, stp->st_access_bmap);
+       share_access = ~share_access;
+       share_access &= open->op_share_access;
+
+       /* update the struct file */
+       if (share_access & NFS4_SHARE_ACCESS_WRITE) {
+               status = get_write_access(inode);
+               if (status)
+                       return nfserrno(status);
+               if (open->op_truncate) {
+                       struct iattr iattr = {
+                               .ia_valid = ATTR_SIZE,
+                               .ia_size = 0,
+                       };
+                       status = nfsd_setattr(rqstp, cur_fh, &iattr, 0,
+                                       (time_t)0);
+                       if (status) {
+                               put_write_access(inode);
+                               return status;
+                       }
+               }
+
+               /* remember the open */
+               filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ;
+               set_bit(open->op_share_access, &stp->st_access_bmap);
+               set_bit(open->op_share_deny, &stp->st_deny_bmap);
+       }
+       return nfs_ok;
+}
+
+
+/* decrement seqid on successful reclaim, it will be bumped in encode_open */
+static void
+nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
+{
+       if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
+               if (*status)
+                       *status = nfserr_reclaim_bad;
+               else {
+                       open->op_stateowner->so_confirmed = 1;
+                       open->op_stateowner->so_seqid--;
+               }
+       }
+}
+
+/*
+ * Attempt to hand out a delegation.
+ */
+static void
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp, int *flag)
+{
+       struct nfs4_delegation *dp;
+       struct nfs4_stateowner *sop = stp->st_stateowner;
+       struct nfs4_callback *cb = &sop->so_client->cl_callback;
+       struct file_lock fl, *flp = &fl;
+       int status;
+
+       if (*flag == DONT_DELEGATE) {
+               *flag = NFS4_OPEN_DELEGATE_NONE;
+               return;
+       }
+
+       /* set flag */
+       *flag = NFS4_OPEN_DELEGATE_NONE;
+       if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
+            || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
+               return;
+
+       if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+               *flag = NFS4_OPEN_DELEGATE_READ;
+
+       else if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))
+               *flag = NFS4_OPEN_DELEGATE_WRITE;
+
+       if (!(dp = alloc_init_deleg(sop->so_client, stp->st_file, fh, *flag)))
+               return;
+       locks_init_lock(&fl);
+       fl.fl_lmops = &nfsd_lease_mng_ops;
+       fl.fl_flags = FL_LEASE;
+       fl.fl_end = OFFSET_MAX;
+       fl.fl_owner =  (fl_owner_t)dp;
+       fl.fl_file = stp->st_vfs_file;
+       fl.fl_pid = current->tgid;
+
+       if ((status = setlease(stp->st_vfs_file,
+               *flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
+               dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+               list_del(&dp->dl_del_perfile);
+               list_del(&dp->dl_del_perclnt);
+               kfree(dp);
+               free_delegation++;
+               *flag = NFS4_OPEN_DELEGATE_NONE;
+               return;
+       }
+
+       memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
+
+       dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
+                    dp->dl_stateid.si_boot,
+                    dp->dl_stateid.si_stateownerid,
+                    dp->dl_stateid.si_fileid,
+                    dp->dl_stateid.si_generation);
+}
+
  /*
   * called with nfs4_lock_state() held.
   */
  int
  nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
  {
-       struct iattr iattr;
         struct nfs4_stateowner *sop = open->op_stateowner;
         struct nfs4_file *fp = NULL;
-       struct inode *ino;
+       struct inode *ino = current_fh->fh_dentry->d_inode;
         unsigned int fi_hashval;
-       struct nfs4_stateid *stq, *stp = NULL;
-       int status;
-
-       status = nfserr_resource;
-       if (!sop)
-               return status;
-
-       ino = current_fh->fh_dentry->d_inode;
+       struct nfs4_stateid *stp = NULL;
+       int status, delegflag = 0;
  
         status = nfserr_inval;
         if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
                 goto out;
-
+       /*
+        * Lookup file; if found, lookup stateid and check open request,
+        * and check for delegations in the process of being recalled.
+        * If not found, create the nfs4_file struct
+        */
         fi_hashval = file_hashval(ino);
         if (find_file(fi_hashval, ino, &fp)) {
-               /* Search for conflicting share reservations */
-               status = nfserr_share_denied;
-               list_for_each_entry(stq, &fp->fi_perfile, st_perfile) {
-                       if(stq->st_stateowner == sop) {
-                               stp = stq;
-                               continue;
-                       }
-                       /* ignore lock owners */
-                       if (stq->st_stateowner->so_is_open_owner == 0)
-                               continue;
-                       if (!test_share(stq,open))      
-                               goto out;
-               }
+               if ((status = nfs4_check_open(fp, sop, open, &stp)))
+                       goto out;
+               if ((status = nfs4_check_deleg_recall(fp, open, &delegflag)))
+                       goto out;
         } else {
-       /* No nfs4_file found; allocate and init a new one */
                 status = nfserr_resource;
                 if ((fp = alloc_init_file(fi_hashval, ino)) == NULL)
                         goto out;
         }
  
-       if (!stp) {
-               int flags = 0;
-
-               status = nfserr_resource;
-               if ((stp = kmalloc(sizeof(struct nfs4_stateid),
-                                               GFP_KERNEL)) == NULL)
+       /*
+        * OPEN the file, or upgrade an existing OPEN.
+        * If truncate fails, the OPEN fails.
+        */
+       if (stp) {
+               /* Stateid was found, this is an OPEN upgrade */
+               status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
+               if (status)
                         goto out;
-
+       } else {
+               /* Stateid was not found, this is a new OPEN */
+               int flags = 0;
                 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
                         flags = MAY_WRITE;
                 else
                         flags = MAY_READ;
-               if ((status = nfsd_open(rqstp, current_fh,  S_IFREG,
-                                             flags,
-                                             &stp->st_vfs_file)) != 0)
-                       goto out_free;
-
-               vfsopen++;
-               dget(stp->st_vfs_file.f_dentry);
-               mntget(stp->st_vfs_file.f_vfsmnt);
-
-               init_stateid(stp, fp, sop, open);
-               stp->st_vfs_set = 1;
-       } else {
-               /* This is an upgrade of an existing OPEN. 
-                * OR the incoming share with the existing 
-                * nfs4_stateid share */
-               unsigned int share_access;
-
-               set_access(&share_access, stp->st_access_bmap);
-               share_access = ~share_access;
-               share_access &= open->op_share_access;
-
-               /* update the struct file */
-               if ((status = nfs4_file_upgrade(&stp->st_vfs_file, share_access)))
-                       goto out;
-               /* remember the open */
-               set_bit(open->op_share_access, &stp->st_access_bmap);
-               set_bit(open->op_share_deny, &stp->st_deny_bmap);
-               /* bump the stateid */
-               update_stateid(&stp->st_stateid);
-       }
-       dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n\n",
-                   stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
-                   stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
-
-       if (open->op_truncate) {
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = 0;
-               status = nfsd_setattr(rqstp, current_fh, &iattr, 0, (time_t)0);
-               if (status)
+               if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
                         goto out;
+               init_stateid(stp, fp, sop, open);
+               if (open->op_truncate) {
+                       struct iattr iattr = {
+                               .ia_valid = ATTR_SIZE,
+                               .ia_size = 0,
+                       };
+                       status = nfsd_setattr(rqstp, current_fh, &iattr, 0,
+                                       (time_t)0);
+                       if (status) {
+                               release_stateid(stp, OPEN_STATE);
+                               goto out;
+                       }
+               }
         }
         memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
  
-       open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+       /*
+       * Attempt to hand out a delegation. No error return, because the
+       * OPEN succeeds even if we fail.
+       */
+       nfs4_open_delegation(current_fh, open, stp, &delegflag);
+       open->op_delegate_type = delegflag;
+
         status = nfs_ok;
+
+       dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
+                   stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
+                   stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
  out:
+       /* take the opportunity to clean up unused state */
         if (fp && list_empty(&fp->fi_perfile))
                 release_file(fp);
  
-       if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
-               if (status)
-                       status = nfserr_reclaim_bad;
-               else {
-               /* successful reclaim. so_seqid is decremented because
-               * it will be bumped in encode_open
-               */
-                       open->op_stateowner->so_confirmed = 1;
-                       open->op_stateowner->so_seqid--;
-               }
-       }
+       /* CLAIM_PREVIOUS has different error returns */
+       nfs4_set_claim_prev(open, &status);
         /*
         * To finish the open response, we just need to set the rflags.
         */
-       open->op_rflags = 0;
+       open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
         if (!open->op_stateowner->so_confirmed)
                 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
  
         return status;
-out_free:
-       kfree(stp);
-       goto out;
  }
  
  static struct work_struct laundromat_work;
@@ -1333,14 +1843,15 @@ nfs4_laundromat(void)
  {
         struct nfs4_client *clp;
         struct nfs4_stateowner *sop;
+       struct nfs4_delegation *dp;
         struct list_head *pos, *next;
         time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
         time_t t, clientid_val = NFSD_LEASE_TIME;
-       time_t u, close_val = NFSD_LEASE_TIME;
+       time_t u, test_val = NFSD_LEASE_TIME;
  
         nfs4_lock_state();
  
-       dprintk("NFSD: laundromat service - starting, examining clients\n");
+       dprintk("NFSD: laundromat service - starting\n");
         list_for_each_safe(pos, next, &client_lru) {
                 clp = list_entry(pos, struct nfs4_client, cl_lru);
                 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1353,17 +1864,36 @@ nfs4_laundromat(void)
                         clp->cl_clientid.cl_id);
                 expire_client(clp);
         }
+       spin_lock(&recall_lock);
+       list_for_each_safe(pos, next, &del_recall_lru) {
+               dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+               if (atomic_read(&dp->dl_state) == NFS4_RECALL_COMPLETE)
+                       goto reap;
+               if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+                       u = dp->dl_time - cutoff;
+                       if (test_val > u)
+                               test_val = u;
+                       break;
+               }
+reap:
+               dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
+                                   dp, dp->dl_flock);
+               release_delegation(dp);
+       }
+       spin_unlock(&recall_lock);
+       test_val = NFSD_LEASE_TIME;
         list_for_each_safe(pos, next, &close_lru) {
                 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
                 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
                         u = sop->so_time - cutoff;
-                       if (close_val > u)
-                               close_val = u;
+                       if (test_val > u)
+                               test_val = u;
                         break;
                 }
                 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
                         sop->so_id);
-               release_stateowner(sop);
+               list_del(&sop->so_close_lru);
+               nfs4_put_stateowner(sop);
         }
         if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
                 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -1391,7 +1921,7 @@ find_openstateowner_id(u32 st_id, int flags) {
         dprintk("NFSD: find_openstateowner_id %d\n", st_id);
         if (flags & CLOSE_STATE) {
                 list_for_each_entry(local, &close_lru, so_close_lru) {
-                       if(local->so_id == st_id)
+                       if (local->so_id == st_id)
                                 return local;
                 }
         }
@@ -1402,7 +1932,7 @@ static inline int
  nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
  {
         return (stp->st_vfs_set == 0 ||
-               fhp->fh_dentry->d_inode != stp->st_vfs_file.f_dentry->d_inode);
+               fhp->fh_dentry->d_inode != stp->st_vfs_file->f_dentry->d_inode);
  }
  
  static int
@@ -1416,22 +1946,63 @@ STALE_STATEID(stateid_t *stateid)
         return 1;
  }
  
+static inline int
+access_permit_read(unsigned long access_bmap)
+{
+       return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
+               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static inline int
+access_permit_write(unsigned long access_bmap)
+{
+       return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
+               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static
+int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+{
+        int status = nfserr_openmode;
+
+       if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
+                goto out;
+       if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
+                goto out;
+       status = nfs_ok;
+out:
+       return status;
+}
+
+static int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+       int status = nfserr_openmode;
+
+       if ((flags & WR_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+               goto out;
+       if ((flags & RD_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_WRITE))
+               goto out;
+       status = nfs_ok;
+out:
+       return status;
+}
  
  /*
  * Checks for stateid operations
  */
  int
-nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct nfs4_stateid **stpp)
+nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags)
  {
-       struct nfs4_stateid *stp;
+       struct nfs4_stateid *stp = NULL;
+       struct nfs4_delegation *dp = NULL;
+       stateid_t *stidp;
         int status;
  
         dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
                 stateid->si_boot, stateid->si_stateownerid, 
                 stateid->si_fileid, stateid->si_generation); 
  
-       *stpp = NULL;
-
         /* STALE STATEID */
         status = nfserr_stale_stateid;
         if (STALE_STATEID(stateid)) 
@@ -1439,33 +2010,48 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
  
         /* BAD STATEID */
         status = nfserr_bad_stateid;
-       if (!(stp = find_stateid(stateid, flags))) {
-               dprintk("NFSD: preprocess_stateid_op: no open stateid!\n");
-               goto out;
-       }
-       if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
-               dprintk("NFSD: preprocess_stateid_op: fh-stateid mismatch!\n");
-               stp->st_vfs_set = 0;
-               goto out;
-       }
-       if (!stp->st_stateowner->so_confirmed) {
-               dprintk("preprocess_stateid_op: lockowner not confirmed yet!\n");
-               goto out;
+       if (!stateid->si_fileid) { /* delegation stateid */
+               struct inode *ino = current_fh->fh_dentry->d_inode;
+
+               if(!(dp = find_delegation_stateid(ino, stateid))) {
+                       dprintk("NFSD: delegation stateid not found\n");
+                       goto out;
+               }
+               stidp = &dp->dl_stateid;
+       } else { /* open or lock stateid */
+               if (!(stp = find_stateid(stateid, flags))) {
+                       dprintk("NFSD: open or lock stateid not found\n");
+                       goto out;
+               }
+               if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
+                       goto out;
+               if (!stp->st_stateowner->so_confirmed)
+                       goto out;
+               stidp = &stp->st_stateid;
         }
-       if (stateid->si_generation > stp->st_stateid.si_generation) {
-               dprintk("preprocess_stateid_op: future stateid?!\n");
+       if (stateid->si_generation > stidp->si_generation)
                 goto out;
-       }
  
         /* OLD STATEID */
         status = nfserr_old_stateid;
-       if (stateid->si_generation < stp->st_stateid.si_generation) {
-               dprintk("preprocess_stateid_op: old stateid!\n");
+       if (stateid->si_generation < stidp->si_generation)
                 goto out;
+       if (stp) {
+               if ((status = nfs4_check_openmode(stp,flags)))
+                       goto out;
+               renew_client(stp->st_stateowner->so_client);
+       } else if (dp) {
+               if ((status = nfs4_check_delegmode(dp, flags)))
+                       goto out;
+               renew_client(dp->dl_client);
+               if (flags & DELEG_RET) {
+                       atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
+                       spin_lock(&recall_lock);
+                       release_delegation(dp);
+                       spin_unlock(&recall_lock);
+               }
         }
-       *stpp = stp;
         status = nfs_ok;
-       renew_client(stp->st_stateowner->so_client);
  out:
         return status;
  }
@@ -1512,10 +2098,12 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
  
         status = nfserr_bad_stateid;
  
-       /* for new lock stateowners, check that the lock->v.new.open_stateid
-        * refers to an open stateowner, and that the lockclid
-        * (nfs4_lock->v.new.clientid) is the same as the
-        * open_stateid->st_stateowner->so_client->clientid
+       /* for new lock stateowners:
+        * check that the lock->v.new.open_stateid
+        * refers to an open stateowner
+        *
+        * check that the lockclid (nfs4_lock->v.new.clientid) is the same
+        * as the open_stateid->st_stateowner->so_client->clientid
          */
         if (lockclid) {
                 struct nfs4_stateowner *sop = stp->st_stateowner;
@@ -1601,8 +2189,16 @@ check_replay:
  }
  
  /*
- * nfs4_unlock_state(); called in encode
+ * eventually, this will perform an upcall to the 'state daemon' as well as
+ * set the cl_first_state field.
   */
+void
+first_state(struct nfs4_client *clp)
+{
+       if (!clp->cl_first_state)
+               clp->cl_first_state = get_seconds();
+}
+
  int
  nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
  {
@@ -1617,7 +2213,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
         if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
                 goto out;
  
-       oc->oc_stateowner = NULL;
         nfs4_lock_state();
  
         if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid,
@@ -1637,7 +2232,11 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
                          stp->st_stateid.si_fileid,
                          stp->st_stateid.si_generation);
         status = nfs_ok;
+       first_state(sop->so_client);
  out:
+       if (oc->oc_stateowner)
+               nfs4_get_stateowner(oc->oc_stateowner);
+       nfs4_unlock_state();
         return status;
  }
  
@@ -1666,10 +2265,6 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
         }
  }
  
-/*
- * nfs4_unlock_state(); called in encode
- */
-
  int
  nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
  {
@@ -1681,10 +2276,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
                         (int)current_fh->fh_dentry->d_name.len,
                         current_fh->fh_dentry->d_name.name);
  
-       od->od_stateowner = NULL;
-       status = nfserr_inval;
         if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny))
-               goto out;
+               return nfserr_inval;
  
         nfs4_lock_state();
         if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, 
@@ -1705,7 +2298,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
                 goto out;
         }
         set_access(&share_access, stp->st_access_bmap);
-       nfs4_file_downgrade(&stp->st_vfs_file, 
+       nfs4_file_downgrade(stp->st_vfs_file,
                             share_access & ~od->od_share_access);
  
         reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
@@ -1715,6 +2308,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n
         memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
         status = nfs_ok;
  out:
+       if (od->od_stateowner)
+               nfs4_get_stateowner(od->od_stateowner);
+       nfs4_unlock_state();
         return status;
  }
  
@@ -1731,7 +2327,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
                         (int)current_fh->fh_dentry->d_name.len,
                         current_fh->fh_dentry->d_name.name);
  
-       close->cl_stateowner = NULL;
         nfs4_lock_state();
         /* check close_lru for replay */
         if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, 
@@ -1748,10 +2343,29 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
  
         /* release_state_owner() calls nfsd_close() if needed */
         release_state_owner(stp, &close->cl_stateowner, OPEN_STATE);
+out:
+       if (close->cl_stateowner)
+               nfs4_get_stateowner(close->cl_stateowner);
+       nfs4_unlock_state();
+       return status;
+}
+
+int
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
+{
+       int status;
+
+       if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+               goto out;
+
+       nfs4_lock_state();
+       status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET);
+       nfs4_unlock_state();
  out:
         return status;
  }
  
+
  /* 
   * Lock owner state (byte-range locks)
   */
@@ -1778,19 +2392,19 @@ find_stateid(stateid_t *stid, int flags)
         unsigned int hashval;
  
         dprintk("NFSD: find_stateid flags 0x%x\n",flags);
-       if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) {
+       if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
                 hashval = stateid_hashval(st_id, f_id);
                 list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
-                       if((local->st_stateid.si_stateownerid == st_id) &&
-                          (local->st_stateid.si_fileid == f_id))
+                       if ((local->st_stateid.si_stateownerid == st_id) &&
+                           (local->st_stateid.si_fileid == f_id))
                                 return local;
                 }
         } 
-       if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) {
+       if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
                 hashval = stateid_hashval(st_id, f_id);
                 list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
-                       if((local->st_stateid.si_stateownerid == st_id) &&
-                          (local->st_stateid.si_fileid == f_id))
+                       if ((local->st_stateid.si_stateownerid == st_id) &&
+                           (local->st_stateid.si_fileid == f_id))
                                 return local;
                 }
         } else
@@ -1798,6 +2412,32 @@ find_stateid(stateid_t *stid, int flags)
         return NULL;
  }
  
+static struct nfs4_delegation *
+find_delegation_stateid(struct inode *ino, stateid_t *stid)
+{
+       struct nfs4_delegation *dp = NULL;
+       struct nfs4_file *fp = NULL;
+       u32 st_id;
+       unsigned int fi_hashval;
+
+       dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
+                    stid->si_boot, stid->si_stateownerid,
+                    stid->si_fileid, stid->si_generation);
+
+       if(!ino || !stid)
+               return NULL;
+       st_id = stid->si_stateownerid;
+       fi_hashval = file_hashval(ino);
+       if (find_file(fi_hashval, ino, &fp)) {
+               list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+                       if(dp->dl_stateid.si_stateownerid == st_id) {
+                               dprintk("NFSD: find_delegation dp %p\n",dp);
+                               return dp;
+                       }
+               }
+       }
+       return NULL;
+}
  
  /*
   * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -1839,10 +2479,14 @@ static inline void
  nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
  {
         struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner;
+       unsigned int hval = lockownerid_hashval(sop->so_id);
  
         deny->ld_sop = NULL;
-       if (nfs4_verify_lock_stateowner(sop, fl->fl_pid))
+       if (nfs4_verify_lock_stateowner(sop, hval)) {
+               kref_get(&sop->so_ref);
                 deny->ld_sop = sop;
+               deny->ld_clientid = sop->so_client->cl_clientid;
+       }
         deny->ld_start = fl->fl_start;
         deny->ld_length = ~(u64)0;
         if (fl->fl_end != ~(u64)0)
@@ -1852,13 +2496,28 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
                 deny->ld_type = NFS4_WRITE_LT;
  }
  
+static struct nfs4_stateowner *
+find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
+{
+       struct nfs4_stateowner *local = NULL;
+       int i;
+
+       for (i = 0; i < LOCK_HASH_SIZE; i++) {
+               list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
+                       if (!cmp_owner_str(local, owner, clid))
+                               continue;
+                       return local;
+               }
+       }
+       return NULL;
+}
  
  static int
  find_lockstateowner_str(unsigned int hashval, struct xdr_netobj *owner, clientid_t *clid, struct nfs4_stateowner **op) {
         struct nfs4_stateowner *local = NULL;
  
         list_for_each_entry(local, &lock_ownerstr_hashtbl[hashval], so_strhash) {
-               if(!cmp_owner_str(local, owner, clid)) 
+               if (!cmp_owner_str(local, owner, clid))
                         continue;
                 *op = local;
                 return(1);
@@ -1883,7 +2542,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
         unsigned int idhashval;
  
         if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
-               return (struct nfs4_stateowner *)NULL;
+               return NULL;
         idhashval = lockownerid_hashval(current_ownerid);
         INIT_LIST_HEAD(&sop->so_idhash);
         INIT_LIST_HEAD(&sop->so_strhash);
@@ -1933,7 +2592,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
         stp->st_stateid.si_stateownerid = sop->so_id;
         stp->st_stateid.si_fileid = fp->fi_id;
         stp->st_stateid.si_generation = 0;
-       stp->st_vfs_file = open_stp->st_vfs_file;
+       stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
         stp->st_vfs_set = open_stp->st_vfs_set;
         stp->st_access_bmap = open_stp->st_access_bmap;
         stp->st_deny_bmap = open_stp->st_deny_bmap;
@@ -1946,13 +2605,11 @@ int
  check_lock_length(u64 offset, u64 length)
  {
         return ((length == 0)  || ((length != ~(u64)0) &&
-            LOFF_OVERFLOW(offset, length)));
+            LOFF_OVERFLOW(offset, length)));
  }
  
  /*
   *  LOCK operation 
- *
- * nfs4_unlock_state(); called in encode
   */
  int
  nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
@@ -1971,13 +2628,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
  
         if (nfs4_in_grace() && !lock->lk_reclaim)
                 return nfserr_grace;
-       if (nfs4_in_no_grace() && lock->lk_reclaim)
+       if (!nfs4_in_grace() && lock->lk_reclaim)
                 return nfserr_no_grace;
  
         if (check_lock_length(lock->lk_offset, lock->lk_length))
                  return nfserr_inval;
  
-       lock->lk_stateowner = NULL;
         nfs4_lock_state();
  
         if (lock->lk_is_new) {
@@ -1994,7 +2650,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                         printk("NFSD: nfsd4_lock: clientid is stale!\n");
                         goto out;
                 }
-               /* does the clientid in the lock owner own the open stateid? */
+
+               /* is the new lock seqid presented by the client zero? */
+               status = nfserr_bad_seqid;
+               if (lock->v.new.lock_seqid != 0)
+                       goto out;
  
                 /* validate and update open stateid and open seqid */
                 status = nfs4_preprocess_seqid_op(current_fh, 
@@ -2013,22 +2673,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                 strhashval = lock_ownerstr_hashval(fp->fi_inode, 
                                 open_sop->so_client->cl_clientid.cl_id, 
                                 lock->v.new.owner);
-
                 /* 
                  * If we already have this lock owner, the client is in 
                  * error (or our bookeeping is wrong!) 
                  * for asking for a 'new lock'.
                  */
                 status = nfserr_bad_stateid;
-               if (find_lockstateowner_str(strhashval, &lock->v.new.owner,
-                                       &lock->v.new.clientid, &lock_sop))
+               lock_sop = find_lockstateowner(&lock->v.new.owner,
+                                               &lock->v.new.clientid);
+               if (lock_sop)
                         goto out;
                 status = nfserr_resource;
                 if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
                         goto out;
                 if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, 
-                                               fp, open_stp)) == NULL)
+                                               fp, open_stp)) == NULL) {
+                       release_stateowner(lock->lk_stateowner);
+                       lock->lk_stateowner = NULL;
                         goto out;
+               }
                 /* bump the open seqid used to create the lock */
                 open_sop->so_seqid++;
         } else {
@@ -2042,13 +2705,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                         goto out;
         }
         /* lock->lk_stateowner and lock_stp have been created or found */
-       filp = &lock_stp->st_vfs_file;
+       filp = lock_stp->st_vfs_file;
  
         if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
                 printk("NFSD: nfsd4_lock: permission denied!\n");
                 goto out;
         }
  
+       locks_init_lock(&file_lock);
         switch (lock->lk_type) {
                 case NFS4_READ_LT:
                 case NFS4_READW_LT:
@@ -2063,12 +2727,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                 goto out;
         }
         file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner;
-       file_lock.fl_pid = lockownerid_hashval(lock->lk_stateowner->so_id);
+       file_lock.fl_pid = current->tgid;
         file_lock.fl_file = filp;
         file_lock.fl_flags = FL_POSIX;
-       file_lock.fl_notify = NULL;
-       file_lock.fl_insert = NULL;
-       file_lock.fl_remove = NULL;
  
         file_lock.fl_start = lock->lk_offset;
         if ((lock->lk_length == ~(u64)0) || 
@@ -2084,7 +2745,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         */
  
         status = posix_lock_file(filp, &file_lock);
-       dprintk("NFSD: nfsd4_lock: posix_test_lock passed. posix_lock_file status %d\n",status);
+       if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+               file_lock.fl_ops->fl_release_private(&file_lock);
+       dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
         switch (-status) {
         case 0: /* success! */
                 update_stateid(&lock_stp->st_stateid);
@@ -2125,6 +2788,9 @@ out_destroy_new_stateid:
                 release_state_owner(lock_stp, &lock->lk_stateowner, LOCK_STATE);
         }
  out:
+       if (lock->lk_stateowner)
+               nfs4_get_stateowner(lock->lk_stateowner);
+       nfs4_unlock_state();
         return status;
  }
  
@@ -2135,7 +2801,6 @@ int
  nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
  {
         struct inode *inode;
-       struct nfs4_stateowner *sop;
         struct file file;
         struct file_lock file_lock;
         struct file_lock *conflicting_lock;
@@ -2165,6 +2830,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         }
  
         inode = current_fh->fh_dentry->d_inode;
+       locks_init_lock(&file_lock);
         switch (lockt->lt_type) {
                 case NFS4_READ_LT:
                 case NFS4_READW_LT:
@@ -2186,14 +2852,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         find_lockstateowner_str(strhashval, &lockt->lt_owner,
                                         &lockt->lt_clientid, 
                                         &lockt->lt_stateowner);
-       sop = lockt->lt_stateowner;
-       if (sop) {
-               file_lock.fl_owner = (fl_owner_t) sop;
-               file_lock.fl_pid = lockownerid_hashval(sop->so_id);
-       } else {
-               file_lock.fl_owner = NULL;
-               file_lock.fl_pid = 0;
-       }
+       if (lockt->lt_stateowner)
+               file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
+       file_lock.fl_pid = current->tgid;
         file_lock.fl_flags = FL_POSIX;
  
         file_lock.fl_start = lockt->lt_offset;
@@ -2238,7 +2899,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         if (check_lock_length(locku->lu_offset, locku->lu_length))
                  return nfserr_inval;
  
-       locku->lu_stateowner = NULL;
         nfs4_lock_state();
                                                                                 
         if ((status = nfs4_preprocess_seqid_op(current_fh, 
@@ -2248,16 +2908,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                                         &locku->lu_stateowner, &stp, NULL)))
                 goto out;
  
-       filp = &stp->st_vfs_file;
+       filp = stp->st_vfs_file;
         BUG_ON(!filp);
+       locks_init_lock(&file_lock);
         file_lock.fl_type = F_UNLCK;
         file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
-       file_lock.fl_pid = lockownerid_hashval(locku->lu_stateowner->so_id);
+       file_lock.fl_pid = current->tgid;
         file_lock.fl_file = filp;
         file_lock.fl_flags = FL_POSIX; 
-       file_lock.fl_notify = NULL;
-       file_lock.fl_insert = NULL;
-       file_lock.fl_remove = NULL;
         file_lock.fl_start = locku->lu_offset;
  
         if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -2270,6 +2928,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         *  Try to unlock the file in the VFS.
         */
         status = posix_lock_file(filp, &file_lock); 
+       if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+               file_lock.fl_ops->fl_release_private(&file_lock);
         if (status) {
                 printk("NFSD: nfs4_locku: posix_lock_file failed!\n");
                 goto out_nfserr;
@@ -2281,6 +2941,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
         memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
  
  out:
+       if (locku->lu_stateowner)
+               nfs4_get_stateowner(locku->lu_stateowner);
+       nfs4_unlock_state();
         return status;
  
  out_nfserr:
@@ -2317,7 +2980,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
         clientid_t *clid = &rlockowner->rl_clientid;
         struct nfs4_stateowner *local = NULL;
         struct xdr_netobj *owner = &rlockowner->rl_owner;
-       int status, i;
+       int status;
  
         dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
                 clid->cl_boot, clid->cl_id);
@@ -2332,23 +2995,18 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
  
         nfs4_lock_state();
  
-       /* find the lockowner */
-        status = nfs_ok;
-       for (i=0; i < LOCK_HASH_SIZE; i++) {
-               list_for_each_entry(local, &lock_ownerstr_hashtbl[i], so_strhash) {
-                       if(cmp_owner_str(local, owner, clid))
-                               break;
-               }
-       }
+       status = nfs_ok;
+       local = find_lockstateowner(owner, clid);
         if (local) {
                 struct nfs4_stateid *stp;
  
-               /* check for any locks held by any stateid associated with the
-                * (lock) stateowner */
+               /* check for any locks held by any stateid
+                * associated with the (lock) stateowner */
                 status = nfserr_locks_held;
-               list_for_each_entry(stp, &local->so_perfilestate, st_perfilestate) {
-                       if(stp->st_vfs_set) {
-                               if (check_for_locks(&stp->st_vfs_file, local))
+               list_for_each_entry(stp, &local->so_perfilestate,
+                               st_perfilestate) {
+                       if (stp->st_vfs_set) {
+                               if (check_for_locks(stp->st_vfs_file, local))
                                         goto out;
                         }
                 }
@@ -2361,6 +3019,112 @@ out:
         return status;
  }
  
+static inline struct nfs4_client_reclaim *
+alloc_reclaim(int namelen)
+{
+       struct nfs4_client_reclaim *crp = NULL;
+
+       crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+       if (!crp)
+               return NULL;
+       crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
+       if (!crp->cr_name.data) {
+               kfree(crp);
+               return NULL;
+       }
+       return crp;
+}
+
+/*
+ * failure => all reset bets are off, nfserr_no_grace...
+ */
+static int
+nfs4_client_to_reclaim(struct nfs4_client *clp)
+{
+       unsigned int strhashval;
+       struct nfs4_client_reclaim *crp = NULL;
+
+       crp = alloc_reclaim(clp->cl_name.len);
+       if (!crp)
+               return 0;
+       strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+       INIT_LIST_HEAD(&crp->cr_strhash);
+       list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+       memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len);
+       crp->cr_name.len = clp->cl_name.len;
+       crp->cr_first_state = clp->cl_first_state;
+       crp->cr_expired = 0;
+       return 1;
+}
+
+static void
+nfs4_release_reclaim(void)
+{
+       struct nfs4_client_reclaim *crp = NULL;
+       int i;
+
+       BUG_ON(!nfs4_reclaim_init);
+       for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+               while (!list_empty(&reclaim_str_hashtbl[i])) {
+                       crp = list_entry(reclaim_str_hashtbl[i].next,
+                                       struct nfs4_client_reclaim, cr_strhash);
+                       list_del(&crp->cr_strhash);
+                       kfree(crp->cr_name.data);
+                       kfree(crp);
+                       reclaim_str_hashtbl_size--;
+               }
+       }
+       BUG_ON(reclaim_str_hashtbl_size);
+}
+
+/*
+ * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
+struct nfs4_client_reclaim *
+nfs4_find_reclaim_client(clientid_t *clid)
+{
+       unsigned int idhashval = clientid_hashval(clid->cl_id);
+       unsigned int strhashval;
+       struct nfs4_client *clp, *client = NULL;
+       struct nfs4_client_reclaim *crp = NULL;
+
+
+       /* find clientid in conf_id_hashtbl */
+       list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+               if (cmp_clid(&clp->cl_clientid, clid)) {
+                       client = clp;
+                       break;
+               }
+       }
+       if (!client)
+               return NULL;
+
+       /* find clp->cl_name in reclaim_str_hashtbl */
+       strhashval = clientstr_hashval(client->cl_name.data,
+                                     client->cl_name.len);
+       list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
+               if (cmp_name(&crp->cr_name, &client->cl_name)) {
+                       return crp;
+               }
+       }
+       return NULL;
+}
+
+/*
+* Called from OPEN. Look for clientid in reclaim list.
+*/
+int
+nfs4_check_open_reclaim(clientid_t *clid)
+{
+       struct nfs4_client_reclaim *crp;
+
+       if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
+               return nfserr_reclaim_bad;
+       if (crp->cr_expired)
+               return nfserr_no_grace;
+       return nfs_ok;
+}
+
+
  /* 
   * Start and stop routines
   */
@@ -2369,10 +3133,16 @@ void
  nfs4_state_init(void)
  {
         int i;
-       time_t start = get_seconds();
+       time_t grace_time;
  
         if (nfs4_init)
                 return;
+       if (!nfs4_reclaim_init) {
+               for (i = 0; i < CLIENT_HASH_SIZE; i++)
+                       INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+               reclaim_str_hashtbl_size = 0;
+               nfs4_reclaim_init = 1;
+       }
         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -2399,33 +3169,46 @@ nfs4_state_init(void)
  
         INIT_LIST_HEAD(&close_lru);
         INIT_LIST_HEAD(&client_lru);
-       init_MUTEX(&client_sema);
-       boot_time = start;
-       grace_end = start + NFSD_LEASE_TIME;
+       INIT_LIST_HEAD(&del_recall_lru);
+       spin_lock_init(&recall_lock);
+       boot_time = get_seconds();
+       grace_time = max(old_lease_time, lease_time);
+       if (reclaim_str_hashtbl_size == 0)
+               grace_time = 0;
+       if (grace_time)
+               printk("NFSD: starting %ld-second grace period\n", grace_time);
+       grace_end = boot_time + grace_time;
         INIT_WORK(&laundromat_work,laundromat_main, NULL);
         schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
         nfs4_init = 1;
-
  }
  
  int
  nfs4_in_grace(void)
  {
-       return time_before(get_seconds(), (unsigned long)grace_end);
+       return get_seconds() < grace_end;
  }
  
-int
-nfs4_in_no_grace(void)
+void
+set_no_grace(void)
  {
-       return (grace_end < get_seconds());
+       printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
+       grace_end = get_seconds();
  }
  
+time_t
+nfs4_lease_time(void)
+{
+       return lease_time;
+}
  
  static void
  __nfs4_state_shutdown(void)
  {
         int i;
         struct nfs4_client *clp = NULL;
+       struct nfs4_delegation *dp = NULL;
+       struct list_head *pos, *next;
  
         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                 while (!list_empty(&conf_id_hashtbl[i])) {
@@ -2437,6 +3220,14 @@ __nfs4_state_shutdown(void)
                         expire_client(clp);
                 }
         }
+       spin_lock(&recall_lock);
+       list_for_each_safe(pos, next, &del_recall_lru) {
+               dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+               release_delegation(dp);
+       }
+       spin_unlock(&recall_lock);
+
         release_all_files();
         cancel_delayed_work(&laundromat_work);
         flush_scheduled_work();
@@ -2451,12 +3242,70 @@ __nfs4_state_shutdown(void)
                         alloc_sowner, alloc_lsowner, free_sowner);
         dprintk("NFSD: vfsopen %d vfsclose %d\n",
                         vfsopen, vfsclose);
+       dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
+                       alloc_delegation, free_delegation);
+
  }
  
  void
  nfs4_state_shutdown(void)
  {
         nfs4_lock_state();
+       nfs4_release_reclaim();
         __nfs4_state_shutdown();
         nfs4_unlock_state();
  }
+
+/*
+ * Called when leasetime is changed.
+ *
+ * if nfsd is not started, simply set the global lease.
+ *
+ * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
+ * e.g: boot_time is reset, existing nfs4_client structs are
+ * used to fill reclaim_str_hashtbl, then all state (except for the
+ * reclaim_str_hashtbl) is re-initialized.
+ *
+ * if the old lease time is greater than the new lease time, the grace
+ * period needs to be set to the old lease time to allow clients to reclaim
+ * their state. XXX - we may want to set the grace period == lease time
+ * after an initial grace period == old lease time
+ *
+ * if an error occurs in this process, the new lease is set, but the server
+ * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
+ * which means OPEN/LOCK/READ/WRITE will fail during grace period.
+ *
+ * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
+ * OPEN and LOCK reclaims.
+ */
+void
+nfs4_reset_lease(time_t leasetime)
+{
+       struct nfs4_client *clp;
+       int i;
+
+       printk("NFSD: New leasetime %ld\n",leasetime);
+       if (!nfs4_init)
+               return;
+       nfs4_lock_state();
+       old_lease_time = lease_time;
+       lease_time = leasetime;
+
+       nfs4_release_reclaim();
+
+       /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
+       for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+               list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
+                       if (!nfs4_client_to_reclaim(clp)) {
+                               nfs4_release_reclaim();
+                               goto init_state;
+                       }
+                       reclaim_str_hashtbl_size++;
+               }
+       }
+init_state:
+       __nfs4_state_shutdown();
+       nfs4_state_init();
+       nfs4_unlock_state();
+}
+