#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_version.h"
+#include "xfs_ioctl32.h"
#include <linux/namei.h>
#include <linux/init.h>
STATIC struct quotactl_ops linvfs_qops;
STATIC struct super_operations linvfs_sops;
STATIC struct export_operations linvfs_export_ops;
-STATIC kmem_cache_t * linvfs_inode_cachep;
+STATIC kmem_zone_t *linvfs_inode_zone;
+STATIC kmem_shaker_t xfs_inode_shaker;
STATIC struct xfs_mount_args *
xfs_args_allocate(
vnode_t *vp = LINVFS_GET_VP(inode);
if (vp->v_type == VNON) {
- make_bad_inode(inode);
+ vn_mark_bad(vp);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = &linvfs_file_inode_operations;
inode->i_fop = &linvfs_file_operations;
bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
}
- vp->v_type = IFTOVT(ip->i_d.di_mode);
-
- /* Have we been called during the new inode create process,
- * in which case we are too early to fill in the Linux inode.
- */
- if (vp->v_type == VNON)
- return;
-
- xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
-
- /* For new inodes we need to set the ops vectors,
- * and unlock the inode.
+ /*
+ * We need to set the ops vectors, and unlock the inode, but if
+ * we have been called during the new inode create process, it is
+ * too early to fill in the Linux inode. We will get called a
+ * second time once the inode is properly set up, and then we can
+ * finish our work.
*/
- if (unlock && (inode->i_state & I_NEW)) {
+ if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+ vp->v_type = IFTOVT(ip->i_d.di_mode);
+ xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
xfs_set_inodeops(inode);
+
+ ip->i_flags &= ~XFS_INEW;
+ barrier();
+
unlock_new_inode(inode);
}
}
-void
-xfs_flush_inode(
- xfs_inode_t *ip)
-{
- struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
-
- filemap_flush(inode->i_mapping);
-}
-
-void
-xfs_flush_device(
- xfs_inode_t *ip)
-{
- sync_blockdev(XFS_ITOV(ip)->v_vfsp->vfs_super->s_bdev);
- xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
-}
-
int
xfs_blkdev_get(
xfs_mount_t *mp,
{
vnode_t *vp;
- vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep,
+ vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone,
kmem_flags_convert(KM_SLEEP));
if (!vp)
return NULL;
linvfs_destroy_inode(
struct inode *inode)
{
- kmem_cache_free(linvfs_inode_cachep, LINVFS_GET_VP(inode));
+ kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+}
+
+STATIC int
+xfs_inode_shake(
+ int priority,
+ unsigned int gfp_mask)
+{
+ int pages;
+
+ pages = kmem_zone_shrink(linvfs_inode_zone);
+ pages += kmem_zone_shrink(xfs_inode_zone);
+ return pages;
}
STATIC void
STATIC int
init_inodecache( void )
{
- linvfs_inode_cachep = kmem_cache_create("linvfs_icache",
- sizeof(vnode_t), 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+ linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+ sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
init_once, NULL);
-
- if (linvfs_inode_cachep == NULL)
+ if (linvfs_inode_zone == NULL)
return -ENOMEM;
return 0;
}
STATIC void
destroy_inodecache( void )
{
- if (kmem_cache_destroy(linvfs_inode_cachep))
+ if (kmem_cache_destroy(linvfs_inode_zone))
printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
}
* at the point when it is unpinned after a log write,
* since this is when the inode itself becomes flushable.
*/
-STATIC void
+STATIC int
linvfs_write_inode(
struct inode *inode,
int sync)
{
vnode_t *vp = LINVFS_GET_VP(inode);
- int error, flags = FLUSH_INODE;
+ int error = 0, flags = FLUSH_INODE;
if (vp) {
vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
flags |= FLUSH_SYNC;
VOP_IFLUSH(vp, flags, error);
}
+
+ return -error;
}
STATIC void
}
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+ struct vfs *vfs,
+ void *data,
+ void (*syncer)(vfs_t *, void *))
+{
+ vfs_sync_work_t *work;
+
+ work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+ INIT_LIST_HEAD(&work->w_list);
+ work->w_syncer = syncer;
+ work->w_data = data;
+ work->w_vfs = vfs;
+ spin_lock(&vfs->vfs_sync_lock);
+ list_add_tail(&work->w_list, &vfs->vfs_sync_list);
+ spin_unlock(&vfs->vfs_sync_lock);
+ wake_up_process(vfs->vfs_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations. At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+ vfs_t *vfs,
+ void *inode)
+{
+ filemap_flush(((struct inode *)inode)->i_mapping);
+ iput((struct inode *)inode);
+}
+
+void
+xfs_flush_inode(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+ struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
+
+ igrab(inode);
+ xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+ delay(HZ/2);
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+ vfs_t *vfs,
+ void *inode)
+{
+ sync_blockdev(vfs->vfs_super->s_bdev);
+ iput((struct inode *)inode);
+}
+
+void
+xfs_flush_device(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+ struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
+
+ igrab(inode);
+ xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+ delay(HZ/2);
+ xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+STATIC void
+vfs_sync_worker(
+ vfs_t *vfsp,
+ void *unused)
+{
+ int error;
+
+ if (!(vfsp->vfs_flag & VFS_RDONLY))
+ VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+ vfsp->vfs_sync_seq++;
+ wmb();
+ wake_up(&vfsp->vfs_wait_single_sync_task);
+}
STATIC int
xfssyncd(
void *arg)
{
+ long timeleft;
vfs_t *vfsp = (vfs_t *) arg;
- int error;
+ struct list_head tmp;
+ struct vfs_sync_work *work, *n;
daemonize("xfssyncd");
+ vfsp->vfs_sync_work.w_vfs = vfsp;
+ vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
vfsp->vfs_sync_task = current;
wmb();
wake_up(&vfsp->vfs_wait_sync_task);
+ INIT_LIST_HEAD(&tmp);
+ timeleft = (xfs_syncd_centisecs * HZ) / 100;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout((xfs_syncd_centisecs * HZ) / 100);
+ timeleft = schedule_timeout(timeleft);
/* swsusp */
if (current->flags & PF_FREEZE)
refrigerator(PF_FREEZE);
if (vfsp->vfs_flag & VFS_UMOUNT)
break;
- if (vfsp->vfs_flag & VFS_RDONLY)
- continue;
- VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
- vfsp->vfs_sync_seq++;
- wmb();
- wake_up(&vfsp->vfs_wait_single_sync_task);
+ spin_lock(&vfsp->vfs_sync_lock);
+ /*
+ * We can get woken by laptop mode, to do a sync -
+ * that's the (only!) case where the list would be
+ * empty with time remaining.
+ */
+ if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+ if (!timeleft)
+ timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
+ list_add_tail(&vfsp->vfs_sync_work.w_list,
+ &vfsp->vfs_sync_list);
+ }
+ list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+ list_move(&work->w_list, &tmp);
+ spin_unlock(&vfsp->vfs_sync_lock);
+
+ list_for_each_entry_safe(work, n, &tmp, w_list) {
+ (*work->w_syncer)(vfsp, work->w_data);
+ list_del(&work->w_list);
+ if (work == &vfsp->vfs_sync_work)
+ continue;
+ kmem_free(work, sizeof(struct vfs_sync_work));
+ }
}
vfsp->vfs_sync_task = NULL;
if (unlikely(laptop_mode)) {
int prev_sync_seq = vfsp->vfs_sync_seq;
+
/*
* The disk must be active because we're syncing.
- * We schedule syncd now (now that the disk is
+ * We schedule xfssyncd now (now that the disk is
* active) instead of later (when it might not be).
*/
wake_up_process(vfsp->vfs_sync_task);
int error;
vnode_t *vp, *cvp;
struct dentry *parent;
- struct inode *ip = NULL;
struct dentry dotdot;
dotdot.d_name.name = "..";
dotdot.d_name.len = 2;
- dotdot.d_inode = 0;
+ dotdot.d_inode = NULL;
cvp = NULL;
vp = LINVFS_GET_VP(child->d_inode);
VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
-
- if (!error) {
- ASSERT(cvp);
- ip = LINVFS_GET_IP(cvp);
- if (!ip) {
- VN_RELE(cvp);
- return ERR_PTR(-EACCES);
- }
- }
- if (error)
+ if (unlikely(error))
return ERR_PTR(-error);
- parent = d_alloc_anon(ip);
- if (!parent) {
+
+ parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+ if (unlikely(!parent)) {
VN_RELE(cvp);
- parent = ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
return parent;
}
vn_init();
xfs_init();
uuid_init();
- vfs_initdmapi();
vfs_initquota();
+ xfs_inode_shaker = kmem_shake_register(xfs_inode_shake);
+ if (!xfs_inode_shaker) {
+ error = -ENOMEM;
+ goto undo_shaker;
+ }
+
+ error = xfs_ioctl32_init();
+ if (error)
+ goto undo_ioctl32;
+
error = register_filesystem(&xfs_fs_type);
if (error)
goto undo_register;
+ XFS_DM_INIT(&xfs_fs_type);
return 0;
undo_register:
+ xfs_ioctl32_exit();
+
+undo_ioctl32:
+ kmem_shake_deregister(xfs_inode_shaker);
+
+undo_shaker:
pagebuf_terminate();
undo_pagebuf:
exit_xfs_fs( void )
{
vfs_exitquota();
- vfs_exitdmapi();
+ XFS_DM_EXIT(&xfs_fs_type);
unregister_filesystem(&xfs_fs_type);
+ xfs_ioctl32_exit();
+ kmem_shake_deregister(xfs_inode_shaker);
xfs_cleanup();
pagebuf_terminate();
destroy_inodecache();