fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / fs / cifs / cifsfs.c
index fbc737e..10c9029 100644 (file)
@@ -32,6 +32,9 @@
 #include <linux/seq_file.h>
 #include <linux/vfs.h>
 #include <linux/mempool.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #define DECLARE_GLOBALS_HERE
@@ -50,24 +53,36 @@ int cifsFYI = 0;
 int cifsERROR = 1;
 int traceSMB = 0;
 unsigned int oplockEnabled = 1;
-unsigned int quotaEnabled = 0;
+unsigned int experimEnabled = 0;
 unsigned int linuxExtEnabled = 1;
 unsigned int lookupCacheEnabled = 1;
 unsigned int multiuser_mount = 0;
-unsigned int extended_security = 0;
-unsigned int ntlmv2_support = 0;
+unsigned int extended_security = CIFSSEC_DEF;
+/* unsigned int ntlmv2_support = 0; */
 unsigned int sign_CIFS_PDUs = 1;
-unsigned int CIFSMaximumBufferSize = CIFS_MAX_MSGSIZE;
+extern struct task_struct * oplockThread; /* remove sparse warning */
 struct task_struct * oplockThread = NULL;
-
-extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
-                       const char *);
-extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
-void cifs_proc_init(void);
-void cifs_proc_clean(void);
-
-static DECLARE_COMPLETION(cifs_oplock_exited);
-
+extern struct task_struct * dnotifyThread; /* remove sparse warning */
+struct task_struct * dnotifyThread = NULL;
+static struct super_operations cifs_super_ops; 
+unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
+module_param(CIFSMaxBufSize, int, 0);
+MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048");
+unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
+module_param(cifs_min_rcv, int, 0);
+MODULE_PARM_DESC(cifs_min_rcv,"Network buffers in pool. Default: 4 Range: 1 to 64");
+unsigned int cifs_min_small = 30;
+module_param(cifs_min_small, int, 0);
+MODULE_PARM_DESC(cifs_min_small,"Small network buffers in pool. Default: 30 Range: 2 to 256");
+unsigned int cifs_max_pending = CIFS_MAX_REQ;
+module_param(cifs_max_pending, int, 0);
+MODULE_PARM_DESC(cifs_max_pending,"Simultaneous requests to server. Default: 50 Range: 2 to 256");
+
+extern mempool_t *cifs_sm_req_poolp;
+extern mempool_t *cifs_req_poolp;
+extern mempool_t *cifs_mid_poolp;
+
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static int
 cifs_read_super(struct super_block *sb, void *data,
@@ -78,13 +93,10 @@ cifs_read_super(struct super_block *sb, void *data,
        int rc = 0;
 
        sb->s_flags |= MS_NODIRATIME; /* and probably even noatime */
-       sb->s_fs_info = kmalloc(sizeof(struct cifs_sb_info),GFP_KERNEL);
+       sb->s_fs_info = kzalloc(sizeof(struct cifs_sb_info),GFP_KERNEL);
        cifs_sb = CIFS_SB(sb);
        if(cifs_sb == NULL)
                return -ENOMEM;
-       else
-               memset(cifs_sb,0,sizeof(struct cifs_sb_info));
-       
 
        rc = cifs_mount(sb, cifs_sb, data, devname);
 
@@ -156,9 +168,11 @@ cifs_put_super(struct super_block *sb)
 }
 
 static int
-cifs_statfs(struct super_block *sb, struct kstatfs *buf)
+cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-       int xid, rc;
+       struct super_block *sb = dentry->d_sb;
+       int xid; 
+       int rc = -EOPNOTSUPP;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
 
@@ -170,22 +184,38 @@ cifs_statfs(struct super_block *sb, struct kstatfs *buf)
        buf->f_type = CIFS_MAGIC_NUMBER;
 
        /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */
-       buf->f_namelen = PATH_MAX;      /* PATH_MAX may be too long - it would presumably
-                                          be length of total path, note that some servers may be 
-                                          able to support more than this, but best to be safe
-                                          since Win2k and others can not handle very long filenames */
+       buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would 
+                                     presumably be total path, but note
+                                     that some servers (includinng Samba 3)
+                                     have a shorter maximum path */
        buf->f_files = 0;       /* undefined */
        buf->f_ffree = 0;       /* unlimited */
 
-       rc = CIFSSMBQFSInfo(xid, pTcon, buf, cifs_sb->local_nls);
-
+/* BB we could add a second check for a QFS Unix capability bit */
+/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
+    if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
+                       le64_to_cpu(pTcon->fsUnixInfo.Capability)))
+           rc = CIFSSMBQFSPosixInfo(xid, pTcon, buf);
+
+    /* Only need to call the old QFSInfo if failed
+    on newer one */
+    if(rc)
+       if(pTcon->ses->capabilities & CAP_NT_SMBS)
+               rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */
+
+       /* Some old Windows servers also do not support level 103, retry with
+          older level one if old server failed the previous call or we
+          bypassed it because we detected that this was an older LANMAN sess */
+       if(rc)
+               rc = SMBOldQFSInfo(xid, pTcon, buf);
        /*     
           int f_type;
           __fsid_t f_fsid;
           int f_namelen;  */
-       /* BB get from info put in tcon struct at mount time with call to QFSAttrInfo */
+       /* BB get from info in tcon struct at mount time call to QFSAttrInfo */
        FreeXid(xid);
-       return 0;               /* always return success? what if volume is no longer available? */
+       return 0;               /* always return success? what if volume is no
+                                  longer available? */
 }
 
 static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
@@ -200,13 +230,15 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
                on the client (above and beyond ACL on servers) for  
                servers which do not support setting and viewing mode bits,
                so allowing client to check permissions is useful */ 
-               return vfs_permission(inode, mask);
+               return generic_permission(inode, mask, NULL);
 }
 
-static kmem_cache_t *cifs_inode_cachep;
-static kmem_cache_t *cifs_req_cachep;
-static kmem_cache_t *cifs_mid_cachep;
-kmem_cache_t *cifs_oplock_cachep;
+static struct kmem_cache *cifs_inode_cachep;
+static struct kmem_cache *cifs_req_cachep;
+static struct kmem_cache *cifs_mid_cachep;
+struct kmem_cache *cifs_oplock_cachep;
+static struct kmem_cache *cifs_sm_req_cachep;
+mempool_t *cifs_sm_req_poolp;
 mempool_t *cifs_req_poolp;
 mempool_t *cifs_mid_poolp;
 
@@ -214,9 +246,7 @@ static struct inode *
 cifs_alloc_inode(struct super_block *sb)
 {
        struct cifsInodeInfo *cifs_inode;
-       cifs_inode =
-           (struct cifsInodeInfo *) kmem_cache_alloc(cifs_inode_cachep,
-                                                     SLAB_KERNEL);
+       cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
        if (!cifs_inode)
                return NULL;
        cifs_inode->cifsAttrs = 0x20;   /* default */
@@ -227,9 +257,8 @@ cifs_alloc_inode(struct super_block *sb)
        file data or metadata */
        cifs_inode->clientCanCacheRead = FALSE;
        cifs_inode->clientCanCacheAll = FALSE;
-       cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
-
+       cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;
        INIT_LIST_HEAD(&cifs_inode->openFileList);
        return &cifs_inode->vfs_inode;
 }
@@ -255,12 +284,14 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
        if (cifs_sb) {
                if (cifs_sb->tcon) {
                        seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
-                       if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->userName))
-                               seq_printf(s, ",username=%s",
+                       if (cifs_sb->tcon->ses) {
+                               if (cifs_sb->tcon->ses->userName)
+                                       seq_printf(s, ",username=%s",
                                           cifs_sb->tcon->ses->userName);
-                       if(cifs_sb->tcon->ses->domainName)
-                               seq_printf(s, ",domain=%s",
-                                       cifs_sb->tcon->ses->domainName);
+                               if(cifs_sb->tcon->ses->domainName)
+                                       seq_printf(s, ",domain=%s",
+                                          cifs_sb->tcon->ses->domainName);
+                       }
                }
                seq_printf(s, ",rsize=%d",cifs_sb->rsize);
                seq_printf(s, ",wsize=%d",cifs_sb->wsize);
@@ -372,13 +403,57 @@ static struct quotactl_ops cifs_quotactl_ops = {
 };
 #endif
 
+static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
+{
+       struct cifs_sb_info *cifs_sb;
+       struct cifsTconInfo * tcon;
+
+       if (!(flags & MNT_FORCE))
+               return;
+       cifs_sb = CIFS_SB(vfsmnt->mnt_sb);
+       if(cifs_sb == NULL)
+               return;
+
+       tcon = cifs_sb->tcon;
+       if(tcon == NULL)
+               return;
+       down(&tcon->tconSem);
+       if (atomic_read(&tcon->useCount) == 1)
+               tcon->tidStatus = CifsExiting;
+       up(&tcon->tconSem);
+
+       /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
+       /* cancel_notify_requests(tcon); */
+       if(tcon->ses && tcon->ses->server)
+       {
+               cFYI(1,("wake up tasks now - umount begin not complete"));
+               wake_up_all(&tcon->ses->server->request_q);
+               wake_up_all(&tcon->ses->server->response_q);
+               msleep(1); /* yield */
+               /* we have to kick the requests once more */
+               wake_up_all(&tcon->ses->server->response_q);
+               msleep(1);
+       }
+/* BB FIXME - finish add checks for tidStatus BB */
+
+       return;
+}
+
+#ifdef CONFIG_CIFS_STATS2
+static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt)
+{
+       /* BB FIXME */
+       return 0;
+}
+#endif
+
 static int cifs_remount(struct super_block *sb, int *flags, char *data)
 {
        *flags |= MS_NODIRATIME;
        return 0;
 }
 
-struct super_operations cifs_super_ops = {
+static struct super_operations cifs_super_ops = {
        .read_inode = cifs_read_inode,
        .put_super = cifs_put_super,
        .statfs = cifs_statfs,
@@ -389,13 +464,16 @@ struct super_operations cifs_super_ops = {
    unless later we add lazy close of inodes or unless the kernel forgets to call
    us with the same number of releases (closes) as opens */
        .show_options = cifs_show_options,
-/*    .umount_begin   = cifs_umount_begin, *//* consider adding in the future */
+       .umount_begin   = cifs_umount_begin,
        .remount_fs = cifs_remount,
+#ifdef CONFIG_CIFS_STATS2
+       .show_stats = cifs_show_stats,
+#endif
 };
 
-static struct super_block *
+static int
 cifs_get_sb(struct file_system_type *fs_type,
-           int flags, const char *dev_name, void *data)
+           int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
        int rc;
        struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
@@ -403,75 +481,43 @@ cifs_get_sb(struct file_system_type *fs_type,
        cFYI(1, ("Devname: %s flags: %d ", dev_name, flags));
 
        if (IS_ERR(sb))
-               return sb;
+               return PTR_ERR(sb);
 
        sb->s_flags = flags;
 
-       rc = cifs_read_super(sb, data, dev_name, flags & MS_VERBOSE ? 1 : 0);
+       rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
        if (rc) {
                up_write(&sb->s_umount);
                deactivate_super(sb);
-               return ERR_PTR(rc);
+               return rc;
        }
        sb->s_flags |= MS_ACTIVE;
-       return sb;
+       return simple_set_mnt(mnt, sb);
 }
 
-static ssize_t
-cifs_read_wrapper(struct file * file, char __user *read_data, size_t read_size,
-          loff_t * poffset)
+static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+                                  unsigned long nr_segs, loff_t pos)
 {
-       if(file == NULL)
-               return -EIO;
-       else if(file->f_dentry == NULL)
-               return -EIO;
-       else if(file->f_dentry->d_inode == NULL)
-               return -EIO;
+       struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+       ssize_t written;
 
-       cFYI(1,("In read_wrapper size %zd at %lld",read_size,*poffset));
-       if(CIFS_I(file->f_dentry->d_inode)->clientCanCacheRead) {
-               return generic_file_read(file,read_data,read_size,poffset);
-       } else {
-               /* BB do we need to lock inode from here until after invalidate? */
-/*             if(file->f_dentry->d_inode->i_mapping) {
-                       filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
-                       filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
-               }*/
-/*             cifs_revalidate(file->f_dentry);*/ /* BB fixme */
-
-               /* BB we should make timer configurable - perhaps 
-                  by simply calling cifs_revalidate here */
-               /* invalidate_remote_inode(file->f_dentry->d_inode);*/
-               return generic_file_read(file,read_data,read_size,poffset);
-       }
+       written = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       if (!CIFS_I(inode)->clientCanCacheAll)
+               filemap_fdatawrite(inode->i_mapping);
+       return written;
 }
 
-static ssize_t
-cifs_write_wrapper(struct file * file, const char __user *write_data,
-           size_t write_size, loff_t * poffset) 
+static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
-       ssize_t written;
-
-       if(file == NULL)
-               return -EIO;
-       else if(file->f_dentry == NULL)
-               return -EIO;
-       else if(file->f_dentry->d_inode == NULL)
-               return -EIO;
-
-       cFYI(1,("In write_wrapper size %zd at %lld",write_size,*poffset));
-
-       /* check whether we can cache writes locally */
-       written = generic_file_write(file,write_data,write_size,poffset);
-       if(!CIFS_I(file->f_dentry->d_inode)->clientCanCacheAll)  {
-               if(file->f_dentry->d_inode->i_mapping) {
-                       filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
-               }
+       /* origin == SEEK_END => we must revalidate the cached file length */
+       if (origin == SEEK_END) {
+               int retval = cifs_revalidate(file->f_path.dentry);
+               if (retval < 0)
+                       return (loff_t)retval;
        }
-       return written;
+       return remote_llseek(file, offset, origin);
 }
 
-
 static struct file_system_type cifs_fs_type = {
        .owner = THIS_MODULE,
        .name = "cifs",
@@ -493,6 +539,12 @@ struct inode_operations cifs_dir_inode_ops = {
        .setattr = cifs_setattr,
        .symlink = cifs_symlink,
        .mknod   = cifs_mknod,
+#ifdef CONFIG_CIFS_XATTR
+       .setxattr = cifs_setxattr,
+       .getxattr = cifs_getxattr,
+       .listxattr = cifs_listxattr,
+       .removexattr = cifs_removexattr,
+#endif
 };
 
 struct inode_operations cifs_file_inode_ops = {
@@ -510,8 +562,9 @@ struct inode_operations cifs_file_inode_ops = {
 };
 
 struct inode_operations cifs_symlink_inode_ops = {
-       .readlink = cifs_readlink,
+       .readlink = generic_readlink, 
        .follow_link = cifs_follow_link,
+       .put_link = cifs_put_link,
        .permission = cifs_permission,
        /* BB add the following two eventually */
        /* revalidate: cifs_revalidate,
@@ -524,30 +577,101 @@ struct inode_operations cifs_symlink_inode_ops = {
 #endif 
 };
 
-struct file_operations cifs_file_ops = {
-       .read = cifs_read_wrapper,
-       .write = cifs_write_wrapper, 
+const struct file_operations cifs_file_ops = {
+       .read = do_sync_read,
+       .write = do_sync_write,
+       .aio_read = generic_file_aio_read,
+       .aio_write = cifs_file_aio_write,
+       .open = cifs_open,
+       .release = cifs_close,
+       .lock = cifs_lock,
+       .fsync = cifs_fsync,
+       .flush = cifs_flush,
+       .mmap  = cifs_file_mmap,
+       .sendfile = generic_file_sendfile,
+       .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_POSIX
+       .ioctl  = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+       .dir_notify = cifs_dir_notify,
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
+};
+
+const struct file_operations cifs_file_direct_ops = {
+       /* no mmap, no aio, no readv - 
+          BB reevaluate whether they can be done with directio, no cache */
+       .read = cifs_user_read,
+       .write = cifs_user_write,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
+       .sendfile = generic_file_sendfile, /* BB removeme BB */
+#ifdef CONFIG_CIFS_POSIX
+       .ioctl  = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+       .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+       .dir_notify = cifs_dir_notify,
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
+};
+const struct file_operations cifs_file_nobrl_ops = {
+       .read = do_sync_read,
+       .write = do_sync_write,
+       .aio_read = generic_file_aio_read,
+       .aio_write = cifs_file_aio_write,
+       .open = cifs_open,
+       .release = cifs_close,
+       .fsync = cifs_fsync,
+       .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
        .sendfile = generic_file_sendfile,
+       .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_POSIX
+       .ioctl  = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+       .dir_notify = cifs_dir_notify,
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
+};
+
+const struct file_operations cifs_file_direct_nobrl_ops = {
+       /* no mmap, no aio, no readv - 
+          BB reevaluate whether they can be done with directio, no cache */
+       .read = cifs_user_read,
+       .write = cifs_user_write,
+       .open = cifs_open,
+       .release = cifs_close,
+       .fsync = cifs_fsync,
+       .flush = cifs_flush,
+       .sendfile = generic_file_sendfile, /* BB removeme BB */
+#ifdef CONFIG_CIFS_POSIX
+       .ioctl  = cifs_ioctl,
+#endif /* CONFIG_CIFS_POSIX */
+       .llseek = cifs_llseek,
+#ifdef CONFIG_CIFS_EXPERIMENTAL
        .dir_notify = cifs_dir_notify,
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
 
-struct file_operations cifs_dir_ops = {
+const struct file_operations cifs_dir_ops = {
        .readdir = cifs_readdir,
        .release = cifs_closedir,
        .read    = generic_read_dir,
+#ifdef CONFIG_CIFS_EXPERIMENTAL
        .dir_notify = cifs_dir_notify,
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
+        .ioctl  = cifs_ioctl,
 };
 
 static void
-cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags)
+cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
 {
-       struct cifsInodeInfo *cifsi = (struct cifsInodeInfo *) inode;
+       struct cifsInodeInfo *cifsi = inode;
 
        if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
            SLAB_CTOR_CONSTRUCTOR) {
@@ -561,7 +685,8 @@ cifs_init_inodecache(void)
 {
        cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
                                              sizeof (struct cifsInodeInfo),
-                                             0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+                                             0, (SLAB_RECLAIM_ACCOUNT|
+                                               SLAB_MEM_SPREAD),
                                              cifs_init_once, NULL);
        if (cifs_inode_cachep == NULL)
                return -ENOMEM;
@@ -572,29 +697,76 @@ cifs_init_inodecache(void)
 static void
 cifs_destroy_inodecache(void)
 {
-       if (kmem_cache_destroy(cifs_inode_cachep))
-               printk(KERN_WARNING "cifs_inode_cache: error freeing\n");
+       kmem_cache_destroy(cifs_inode_cachep);
 }
 
 static int
 cifs_init_request_bufs(void)
 {
+       if(CIFSMaxBufSize < 8192) {
+       /* Buffer size can not be smaller than 2 * PATH_MAX since maximum
+       Unicode path name has to fit in any SMB/CIFS path based frames */
+               CIFSMaxBufSize = 8192;
+       } else if (CIFSMaxBufSize > 1024*127) {
+               CIFSMaxBufSize = 1024 * 127;
+       } else {
+               CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/
+       }
+/*     cERROR(1,("CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize)); */
        cifs_req_cachep = kmem_cache_create("cifs_request",
-                                           CIFS_MAX_MSGSIZE +
+                                           CIFSMaxBufSize +
                                            MAX_CIFS_HDR_SIZE, 0,
                                            SLAB_HWCACHE_ALIGN, NULL, NULL);
        if (cifs_req_cachep == NULL)
                return -ENOMEM;
 
-       cifs_req_poolp = mempool_create(CIFS_MIN_RCV_POOL,
-                                       mempool_alloc_slab,
-                                       mempool_free_slab,
-                                       cifs_req_cachep);
+       if(cifs_min_rcv < 1)
+               cifs_min_rcv = 1;
+       else if (cifs_min_rcv > 64) {
+               cifs_min_rcv = 64;
+               cERROR(1,("cifs_min_rcv set to maximum (64)"));
+       }
+
+       cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
+                                                 cifs_req_cachep);
 
        if(cifs_req_poolp == NULL) {
                kmem_cache_destroy(cifs_req_cachep);
                return -ENOMEM;
        }
+       /* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and
+       almost all handle based requests (but not write response, nor is it
+       sufficient for path based requests).  A smaller size would have
+       been more efficient (compacting multiple slab items on one 4k page) 
+       for the case in which debug was on, but this larger size allows
+       more SMBs to use small buffer alloc and is still much more
+       efficient to alloc 1 per page off the slab compared to 17K (5page) 
+       alloc of large cifs buffers even when page debugging is on */
+       cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq",
+                       MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, 
+                       NULL, NULL);
+       if (cifs_sm_req_cachep == NULL) {
+               mempool_destroy(cifs_req_poolp);
+               kmem_cache_destroy(cifs_req_cachep);
+               return -ENOMEM;              
+       }
+
+       if(cifs_min_small < 2)
+               cifs_min_small = 2;
+       else if (cifs_min_small > 256) {
+               cifs_min_small = 256;
+               cFYI(1,("cifs_min_small set to maximum (256)"));
+       }
+
+       cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
+                                                    cifs_sm_req_cachep);
+
+       if(cifs_sm_req_poolp == NULL) {
+               mempool_destroy(cifs_req_poolp);
+               kmem_cache_destroy(cifs_req_cachep);
+               kmem_cache_destroy(cifs_sm_req_cachep);
+               return -ENOMEM;
+       }
 
        return 0;
 }
@@ -603,9 +775,9 @@ static void
 cifs_destroy_request_bufs(void)
 {
        mempool_destroy(cifs_req_poolp);
-       if (kmem_cache_destroy(cifs_req_cachep))
-               printk(KERN_WARNING
-                      "cifs_destroy_request_cache: error not all structures were freed\n");
+       kmem_cache_destroy(cifs_req_cachep);
+       mempool_destroy(cifs_sm_req_poolp);
+       kmem_cache_destroy(cifs_sm_req_cachep);
 }
 
 static int
@@ -617,10 +789,8 @@ cifs_init_mids(void)
        if (cifs_mid_cachep == NULL)
                return -ENOMEM;
 
-       cifs_mid_poolp = mempool_create(3 /* a reasonable min simultan opers */,
-                                       mempool_alloc_slab,
-                                       mempool_free_slab,
-                                       cifs_mid_cachep);
+       /* 3 is a reasonable minimum number of simultaneous operations */
+       cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep);
        if(cifs_mid_poolp == NULL) {
                kmem_cache_destroy(cifs_mid_cachep);
                return -ENOMEM;
@@ -642,13 +812,8 @@ static void
 cifs_destroy_mids(void)
 {
        mempool_destroy(cifs_mid_poolp);
-       if (kmem_cache_destroy(cifs_mid_cachep))
-               printk(KERN_WARNING
-                      "cifs_destroy_mids: error not all structures were freed\n");
-
-       if (kmem_cache_destroy(cifs_oplock_cachep))
-               printk(KERN_WARNING
-                      "error not all oplock structures were freed\n");
+       kmem_cache_destroy(cifs_mid_cachep);
+       kmem_cache_destroy(cifs_oplock_cachep);
 }
 
 static int cifs_oplock_thread(void * dummyarg)
@@ -659,14 +824,10 @@ static int cifs_oplock_thread(void * dummyarg)
        __u16  netfid;
        int rc;
 
-       daemonize("cifsoplockd");
-       allow_signal(SIGTERM);
-
-       oplockThread = current;
        do {
-               set_current_state(TASK_INTERRUPTIBLE);
+               if (try_to_freeze()) 
+                       continue;
                
-               schedule_timeout(1*HZ);  
                spin_lock(&GlobalMid_Lock);
                if(list_empty(&GlobalOplock_Q)) {
                        spin_unlock(&GlobalMid_Lock);
@@ -684,9 +845,9 @@ static int cifs_oplock_thread(void * dummyarg)
                                DeleteOplockQEntry(oplock_item);
                                /* can not grab inode sem here since it would
                                deadlock when oplock received on delete 
-                               since vfs_unlink holds the i_sem across
+                               since vfs_unlink holds the i_mutex across
                                the call */
-                               /* down(&inode->i_sem);*/
+                               /* mutex_lock(&inode->i_mutex);*/
                                if (S_ISREG(inode->i_mode)) {
                                        rc = filemap_fdatawrite(inode->i_mapping);
                                        if(CIFS_I(inode)->clientCanCacheRead == 0) {
@@ -695,7 +856,7 @@ static int cifs_oplock_thread(void * dummyarg)
                                        }
                                } else
                                        rc = 0;
-                               /* up(&inode->i_sem);*/
+                               /* mutex_unlock(&inode->i_mutex);*/
                                if (rc)
                                        CIFS_I(inode)->write_behind_rc = rc;
                                cFYI(1,("Oplock flush inode %p rc %d",inode,rc));
@@ -715,9 +876,39 @@ static int cifs_oplock_thread(void * dummyarg)
                                }
                        } else
                                spin_unlock(&GlobalMid_Lock);
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(1);  /* yield in case q were corrupt */
                }
-       } while(!signal_pending(current));
-       complete_and_exit (&cifs_oplock_exited, 0);
+       } while (!kthread_should_stop());
+
+       return 0;
+}
+
+static int cifs_dnotify_thread(void * dummyarg)
+{
+       struct list_head *tmp;
+       struct cifsSesInfo *ses;
+
+       do {
+               if (try_to_freeze())
+                       continue;
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(15*HZ);
+               read_lock(&GlobalSMBSeslock);
+               /* check if any stuck requests that need
+                  to be woken up and wakeq so the
+                  thread can wake up and error out */
+               list_for_each(tmp, &GlobalSMBSessionList) {
+                       ses = list_entry(tmp, struct cifsSesInfo, 
+                               cifsSessionList);
+                       if(ses && ses->server && 
+                            atomic_read(&ses->server->inFlight))
+                               wake_up_all(&ses->server->response_q);
+               }
+               read_unlock(&GlobalSMBSeslock);
+       } while (!kthread_should_stop());
+
+       return 0;
 }
 
 static int __init
@@ -727,10 +918,14 @@ init_cifs(void)
 #ifdef CONFIG_PROC_FS
        cifs_proc_init();
 #endif
-       INIT_LIST_HEAD(&GlobalServerList);      /* BB not implemented yet */
+/*     INIT_LIST_HEAD(&GlobalServerList);*/    /* BB not implemented yet */
        INIT_LIST_HEAD(&GlobalSMBSessionList);
        INIT_LIST_HEAD(&GlobalTreeConnectionList);
        INIT_LIST_HEAD(&GlobalOplock_Q);
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+       INIT_LIST_HEAD(&GlobalDnotifyReqList);
+       INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
+#endif 
 /*
  *  Initialize Global counters
  */
@@ -741,31 +936,71 @@ init_cifs(void)
        atomic_set(&tconInfoReconnectCount, 0);
 
        atomic_set(&bufAllocCount, 0);
+       atomic_set(&smBufAllocCount, 0);
+#ifdef CONFIG_CIFS_STATS2
+       atomic_set(&totBufAllocCount, 0);
+       atomic_set(&totSmBufAllocCount, 0);
+#endif /* CONFIG_CIFS_STATS2 */
+
        atomic_set(&midCount, 0);
        GlobalCurrentXid = 0;
        GlobalTotalActiveXid = 0;
        GlobalMaxActiveXid = 0;
-       GlobalSMBSeslock = RW_LOCK_UNLOCKED;
-       GlobalMid_Lock = SPIN_LOCK_UNLOCKED;
+       memset(Local_System_Name, 0, 15);
+       rwlock_init(&GlobalSMBSeslock);
+       spin_lock_init(&GlobalMid_Lock);
+
+       if(cifs_max_pending < 2) {
+               cifs_max_pending = 2;
+               cFYI(1,("cifs_max_pending set to min of 2"));
+       } else if(cifs_max_pending > 256) {
+               cifs_max_pending = 256;
+               cFYI(1,("cifs_max_pending set to max of 256"));
+       }
 
        rc = cifs_init_inodecache();
-       if (!rc) {
-               rc = cifs_init_mids();
-               if (!rc) {
-                       rc = cifs_init_request_bufs();
-                       if (!rc) {
-                               rc = register_filesystem(&cifs_fs_type);
-                               if (!rc) {                
-                                       kernel_thread(cifs_oplock_thread, NULL, 
-                                               CLONE_FS | CLONE_FILES | CLONE_VM);
-                                       return rc; /* Success */
-                               } else
-                                       cifs_destroy_request_bufs();
-                       }
-                       cifs_destroy_mids();
-               }
-               cifs_destroy_inodecache();
+       if (rc)
+               goto out_clean_proc;
+
+       rc = cifs_init_mids();
+       if (rc)
+               goto out_destroy_inodecache;
+
+       rc = cifs_init_request_bufs();
+       if (rc)
+               goto out_destroy_mids;
+
+       rc = register_filesystem(&cifs_fs_type);
+       if (rc)
+               goto out_destroy_request_bufs;
+
+       oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
+       if (IS_ERR(oplockThread)) {
+               rc = PTR_ERR(oplockThread);
+               cERROR(1,("error %d create oplock thread", rc));
+               goto out_unregister_filesystem;
+       }
+
+       dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
+       if (IS_ERR(dnotifyThread)) {
+               rc = PTR_ERR(dnotifyThread);
+               cERROR(1,("error %d create dnotify thread", rc));
+               goto out_stop_oplock_thread;
        }
+
+       return 0;
+
+ out_stop_oplock_thread:
+       kthread_stop(oplockThread);
+ out_unregister_filesystem:
+       unregister_filesystem(&cifs_fs_type);
+ out_destroy_request_bufs:
+       cifs_destroy_request_bufs();
+ out_destroy_mids:
+       cifs_destroy_mids();
+ out_destroy_inodecache:
+       cifs_destroy_inodecache();
+ out_clean_proc:
 #ifdef CONFIG_PROC_FS
        cifs_proc_clean();
 #endif
@@ -783,10 +1018,8 @@ exit_cifs(void)
        cifs_destroy_inodecache();
        cifs_destroy_mids();
        cifs_destroy_request_bufs();
-       if(oplockThread) {
-               send_sig(SIGTERM, oplockThread, 1);
-               wait_for_completion(&cifs_oplock_exited);
-       }
+       kthread_stop(oplockThread);
+       kthread_stop(dnotifyThread);
 }
 
 MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");