vserver 1.9.3
[linux-2.6.git] / fs / xfs / linux-2.6 / xfs_super.c
index 3a2e961..eb81476 100644 (file)
@@ -66,6 +66,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
 #include "xfs_version.h"
+#include "xfs_ioctl32.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -140,7 +141,7 @@ xfs_set_inodeops(
        vnode_t                 *vp = LINVFS_GET_VP(inode);
 
        if (vp->v_type == VNON) {
-               make_bad_inode(inode);
+               vn_mark_bad(vp);
        } else if (S_ISREG(inode->i_mode)) {
                inode->i_op = &linvfs_file_inode_operations;
                inode->i_fop = &linvfs_file_operations;
@@ -230,42 +231,21 @@ xfs_initialize_vnode(
                bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
        }
 
-       vp->v_type = IFTOVT(ip->i_d.di_mode);
-
-       /* Have we been called during the new inode create process,
-        * in which case we are too early to fill in the Linux inode.
-        */
-       if (vp->v_type == VNON)
-               return;
-
-       xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
-
-       /* For new inodes we need to set the ops vectors,
-        * and unlock the inode.
+       /*
+        * We need to set the ops vectors, and unlock the inode, but if
+        * we have been called during the new inode create process, it is
+        * too early to fill in the Linux inode.  We will get called a
+        * second time once the inode is properly set up, and then we can
+        * finish our work.
         */
-       if (unlock && (inode->i_state & I_NEW)) {
+       if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+               vp->v_type = IFTOVT(ip->i_d.di_mode);
+               xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
                xfs_set_inodeops(inode);
                unlock_new_inode(inode);
        }
 }
 
-void
-xfs_flush_inode(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
-
-       filemap_flush(inode->i_mapping);
-}
-
-void
-xfs_flush_device(
-       xfs_inode_t     *ip)
-{
-       sync_blockdev(XFS_ITOV(ip)->v_vfsp->vfs_super->s_bdev);
-       xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
 int
 xfs_blkdev_get(
        xfs_mount_t             *mp,
@@ -319,7 +299,6 @@ xfs_inode_shake(
 {
        int             pages;
 
-       
        pages = kmem_zone_shrink(linvfs_inode_zone);
        pages += kmem_zone_shrink(xfs_inode_zone);
        return pages;
@@ -342,10 +321,8 @@ STATIC int
 init_inodecache( void )
 {
        linvfs_inode_zone = kmem_cache_create("linvfs_icache",
-                               sizeof(vnode_t), 0,
-                               SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+                               sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
                                init_once, NULL);
-
        if (linvfs_inode_zone == NULL)
                return -ENOMEM;
        return 0;
@@ -364,13 +341,13 @@ destroy_inodecache( void )
  * at the point when it is unpinned after a log write,
  * since this is when the inode itself becomes flushable. 
  */
-STATIC void
+STATIC int
 linvfs_write_inode(
        struct inode            *inode,
        int                     sync)
 {
        vnode_t                 *vp = LINVFS_GET_VP(inode);
-       int                     error, flags = FLUSH_INODE;
+       int                     error = 0, flags = FLUSH_INODE;
 
        if (vp) {
                vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -378,6 +355,8 @@ linvfs_write_inode(
                        flags |= FLUSH_SYNC;
                VOP_IFLUSH(vp, flags, error);
        }
+
+       return -error;
 }
 
 STATIC void
@@ -397,36 +376,152 @@ linvfs_clear_inode(
 }
 
 
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+       struct vfs      *vfs,
+       void            *data,
+       void            (*syncer)(vfs_t *, void *))
+{
+       vfs_sync_work_t *work;
+
+       work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+       INIT_LIST_HEAD(&work->w_list);
+       work->w_syncer = syncer;
+       work->w_data = data;
+       work->w_vfs = vfs;
+       spin_lock(&vfs->vfs_sync_lock);
+       list_add_tail(&work->w_list, &vfs->vfs_sync_list);
+       spin_unlock(&vfs->vfs_sync_lock);
+       wake_up_process(vfs->vfs_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+       vfs_t           *vfs,
+       void            *inode)
+{
+       filemap_flush(((struct inode *)inode)->i_mapping);
+       iput((struct inode *)inode);
+}
+
+void
+xfs_flush_inode(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+       struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
+
+       igrab(inode);
+       xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+       delay(HZ/2);
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+       vfs_t           *vfs,
+       void            *inode)
+{
+       sync_blockdev(vfs->vfs_super->s_bdev);
+       iput((struct inode *)inode);
+}
+
+void
+xfs_flush_device(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+       struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
+
+       igrab(inode);
+       xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+       delay(HZ/2);
+       xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
 #define SYNCD_FLAGS    (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+STATIC void
+vfs_sync_worker(
+       vfs_t           *vfsp,
+       void            *unused)
+{
+       int             error;
+
+       if (!(vfsp->vfs_flag & VFS_RDONLY))
+               VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+       vfsp->vfs_sync_seq++;
+       wmb();
+       wake_up(&vfsp->vfs_wait_single_sync_task);
+}
 
 STATIC int
 xfssyncd(
        void                    *arg)
 {
+       long                    timeleft;
        vfs_t                   *vfsp = (vfs_t *) arg;
-       int                     error;
+       struct list_head        tmp;
+       struct vfs_sync_work    *work, *n;
 
        daemonize("xfssyncd");
 
+       vfsp->vfs_sync_work.w_vfs = vfsp;
+       vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
        vfsp->vfs_sync_task = current;
        wmb();
        wake_up(&vfsp->vfs_wait_sync_task);
 
+       INIT_LIST_HEAD(&tmp);
+       timeleft = (xfs_syncd_centisecs * HZ) / 100;
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout((xfs_syncd_centisecs * HZ) / 100);
+               timeleft = schedule_timeout(timeleft);
                /* swsusp */
                if (current->flags & PF_FREEZE)
                        refrigerator(PF_FREEZE);
                if (vfsp->vfs_flag & VFS_UMOUNT)
                        break;
-               if (vfsp->vfs_flag & VFS_RDONLY)
-                       continue;
-               VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
 
-               vfsp->vfs_sync_seq++;
-               wmb();
-               wake_up(&vfsp->vfs_wait_single_sync_task);
+               spin_lock(&vfsp->vfs_sync_lock);
+               /*
+                * We can get woken by laptop mode, to do a sync -
+                * that's the (only!) case where the list would be
+                * empty with time remaining.
+                */
+               if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+                       if (!timeleft)
+                               timeleft = (xfs_syncd_centisecs * HZ) / 100;
+                       INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
+                       list_add_tail(&vfsp->vfs_sync_work.w_list,
+                                       &vfsp->vfs_sync_list);
+               }
+               list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+                       list_move(&work->w_list, &tmp);
+               spin_unlock(&vfsp->vfs_sync_lock);
+
+               list_for_each_entry_safe(work, n, &tmp, w_list) {
+                       (*work->w_syncer)(vfsp, work->w_data);
+                       list_del(&work->w_list);
+                       if (work == &vfsp->vfs_sync_work)
+                               continue;
+                       kmem_free(work, sizeof(struct vfs_sync_work));
+               }
        }
 
        vfsp->vfs_sync_task = NULL;
@@ -514,9 +609,10 @@ linvfs_sync_super(
 
        if (unlikely(laptop_mode)) {
                int     prev_sync_seq = vfsp->vfs_sync_seq;
+
                /*
                 * The disk must be active because we're syncing.
-                * We schedule syncd now (now that the disk is
+                * We schedule xfssyncd now (now that the disk is
                 * active) instead of later (when it might not be).
                 */
                wake_up_process(vfsp->vfs_sync_task);
@@ -576,31 +672,22 @@ linvfs_get_parent(
        int                     error;
        vnode_t                 *vp, *cvp;
        struct dentry           *parent;
-       struct inode            *ip = NULL;
        struct dentry           dotdot;
 
        dotdot.d_name.name = "..";
        dotdot.d_name.len = 2;
-       dotdot.d_inode = 0;
+       dotdot.d_inode = NULL;
 
        cvp = NULL;
        vp = LINVFS_GET_VP(child->d_inode);
        VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
-
-       if (!error) {
-               ASSERT(cvp);
-               ip = LINVFS_GET_IP(cvp);
-               if (!ip) {
-                       VN_RELE(cvp);
-                       return ERR_PTR(-EACCES);
-               }
-       }
-       if (error)
+       if (unlikely(error))
                return ERR_PTR(-error);
-       parent = d_alloc_anon(ip);
-       if (!parent) {
+
+       parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+       if (unlikely(!parent)) {
                VN_RELE(cvp);
-               parent = ERR_PTR(-ENOMEM);
+               return ERR_PTR(-ENOMEM);
        }
        return parent;
 }
@@ -865,6 +952,10 @@ init_xfs_fs( void )
                goto undo_shaker;
        }
 
+       error = xfs_ioctl32_init();
+       if (error)
+               goto undo_ioctl32;
+
        error = register_filesystem(&xfs_fs_type);
        if (error)
                goto undo_register;
@@ -872,6 +963,9 @@ init_xfs_fs( void )
        return 0;
 
 undo_register:
+       xfs_ioctl32_exit();
+
+undo_ioctl32:
        kmem_shake_deregister(xfs_inode_shaker);
 
 undo_shaker:
@@ -890,6 +984,7 @@ exit_xfs_fs( void )
        vfs_exitquota();
        XFS_DM_EXIT(&xfs_fs_type);
        unregister_filesystem(&xfs_fs_type);
+       xfs_ioctl32_exit();
        kmem_shake_deregister(xfs_inode_shaker);
        xfs_cleanup();
        pagebuf_terminate();