X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_mount.c;h=9dfae18d995f5f5654c4230ec1de58530c73ba8c;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=927936e6d0237b04f4014b635592adaf19c07fa7;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 927936e6d..9dfae18d9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1,62 +1,45 @@ /* - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * Further, this software is distributed without any warranty that it is - * free of the rightful claim of any third person regarding infringement - * or the like. Any license provided herein, whether implied or - * otherwise, applies only to this software file. Patent licenses, if - * any, provided herein do not apply to combinations of this program with - * other software, or any other product whatsoever. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * - * For further information regarding this notice, see: - * - * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - #include "xfs.h" -#include "xfs_macros.h" +#include "xfs_fs.h" #include "xfs_types.h" -#include "xfs_inum.h" +#include "xfs_bit.h" #include "xfs_log.h" +#include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_alloc_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_bmap.h" #include "xfs_error.h" -#include "xfs_bit.h" #include "xfs_rw.h" #include "xfs_quota.h" #include "xfs_fsops.h" @@ -64,14 +47,34 @@ STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); STATIC int xfs_uuid_mount(xfs_mount_t *); STATIC void xfs_uuid_unmount(xfs_mount_t *mp); +STATIC void xfs_unmountfs_wait(xfs_mount_t *); + -void xfs_xlatesb(void *, xfs_sb_t *, int, xfs_arch_t, __int64_t); +#ifdef HAVE_PERCPU_SB +STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); +STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); +STATIC void xfs_icsb_sync_counters(xfs_mount_t *); +STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, + int, int); +STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, + int, int); +STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); -static struct { - short offset; - short type; /* 0 = integer - * 1 = binary / string (no translation) - */ +#else + +#define xfs_icsb_destroy_counters(mp) do { } while (0) +#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) +#define xfs_icsb_sync_counters(mp) do { } while (0) +#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) +#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0) + +#endif + +static const struct { + short offset; + short type; /* 0 = integer + * 1 = binary / string (no translation) + */ } xfs_sb_info[] = { { offsetof(xfs_sb_t, sb_magicnum), 0 }, { offsetof(xfs_sb_t, sb_blocksize), 0 }, @@ -129,11 +132,15 @@ xfs_mount_init(void) { xfs_mount_t *mp; - mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); + mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); + + if (xfs_icsb_init_counters(mp)) { + mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; + } AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); spinlock_init(&mp->m_sb_lock, "xfs_sb"); - mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock"); + mutex_init(&mp->m_ilock); initnsema(&mp->m_growlock, 1, "xfs_grow"); /* * Initialize the AIL. @@ -152,8 +159,8 @@ xfs_mount_init(void) */ void xfs_mount_free( - xfs_mount_t *mp, - int remove_bhv) + xfs_mount_t *mp, + int remove_bhv) { if (mp->m_ihash) xfs_ihash_free(mp); @@ -181,14 +188,19 @@ xfs_mount_free( if (mp->m_fsname != NULL) kmem_free(mp->m_fsname, mp->m_fsname_len); + if (mp->m_rtname != NULL) + kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); + if (mp->m_logname != NULL) + kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); if (remove_bhv) { - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); bhv_remove_all_vfsops(vfsp, 0); VFS_REMOVEBHV(vfsp, &mp->m_bhv); } + xfs_icsb_destroy_counters(mp); kmem_free(mp, sizeof(xfs_mount_t)); } @@ -199,7 +211,8 @@ xfs_mount_free( STATIC int xfs_mount_validate_sb( xfs_mount_t *mp, - xfs_sb_t *sbp) + xfs_sb_t *sbp, + int flags) { /* * If the log device and data device have the @@ -209,33 +222,29 @@ xfs_mount_validate_sb( * a volume filesystem in a non-volume manner. */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { - cmn_err(CE_WARN, "XFS: bad magic number"); + xfs_fs_mount_cmn_err(flags, "bad magic number"); return XFS_ERROR(EWRONGFS); } if (!XFS_SB_GOOD_VERSION(sbp)) { - cmn_err(CE_WARN, "XFS: bad version"); + xfs_fs_mount_cmn_err(flags, "bad version"); return XFS_ERROR(EWRONGFS); } if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { - cmn_err(CE_WARN, - "XFS: filesystem is marked as having an external log; " - "specify logdev on the\nmount command line."); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)", - XFS_ERRLEVEL_HIGH, mp, sbp); - return XFS_ERROR(EFSCORRUPTED); + xfs_fs_mount_cmn_err(flags, + "filesystem is marked as having an external log; " + "specify logdev on the\nmount command line."); + return XFS_ERROR(EINVAL); } if (unlikely( sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { - cmn_err(CE_WARN, - "XFS: filesystem is marked as having an internal log; " - "don't specify logdev on\nthe mount command line."); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)", - XFS_ERRLEVEL_HIGH, mp, sbp); - return XFS_ERROR(EFSCORRUPTED); + xfs_fs_mount_cmn_err(flags, + "filesystem is marked as having an internal log; " + "do not specify logdev on\nthe mount command line."); + return XFS_ERROR(EINVAL); } /* @@ -254,12 +263,13 @@ xfs_mount_validate_sb( sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || + sbp->sb_inodelog < XFS_DINODE_MIN_LOG || + sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - sbp->sb_imax_pct > 100)) { - cmn_err(CE_WARN, "XFS: SB sanity check 1 failed"); - XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)", - XFS_ERRLEVEL_LOW, mp, sbp); + (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { + xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -272,40 +282,50 @@ xfs_mount_validate_sb( (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { - cmn_err(CE_WARN, "XFS: SB sanity check 2 failed"); - XFS_ERROR_REPORT("xfs_mount_validate_sb(4)", - XFS_ERRLEVEL_LOW, mp); + xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); return XFS_ERROR(EFSCORRUPTED); } -#if !XFS_BIG_BLKNOS + ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); + ASSERT(sbp->sb_blocklog >= BBSHIFT); + +#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ if (unlikely( - (sbp->sb_dblocks << (__uint64_t)(sbp->sb_blocklog - BBSHIFT)) - > UINT_MAX || - (sbp->sb_rblocks << (__uint64_t)(sbp->sb_blocklog - BBSHIFT)) - > UINT_MAX)) { - cmn_err(CE_WARN, - "XFS: File system is too large to be mounted on this system."); + (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || + (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { +#else /* Limited by UINT_MAX of sectors */ + if (unlikely( + (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || + (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { +#endif + xfs_fs_mount_cmn_err(flags, + "file system too large to be mounted on this system."); return XFS_ERROR(E2BIG); } -#endif if (unlikely(sbp->sb_inprogress)) { - cmn_err(CE_WARN, "XFS: file system busy"); - XFS_ERROR_REPORT("xfs_mount_validate_sb(5)", - XFS_ERRLEVEL_LOW, mp); + xfs_fs_mount_cmn_err(flags, "file system busy"); return XFS_ERROR(EFSCORRUPTED); } + /* + * Version 1 directory format has never worked on Linux. + */ + if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { + xfs_fs_mount_cmn_err(flags, + "file system using version 1 directory format"); + return XFS_ERROR(ENOSYS); + } + /* * Until this is fixed only page-sized or smaller data blocks work. */ if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - cmn_err(CE_WARN, - "XFS: Attempted to mount file system with blocksize %d bytes", + xfs_fs_mount_cmn_err(flags, + "file system with blocksize %d bytes", sbp->sb_blocksize); - cmn_err(CE_WARN, - "XFS: Only page-sized (%d) or less blocksizes currently work.", + xfs_fs_mount_cmn_err(flags, + "only pagesize (%ld) or less will currently work.", PAGE_SIZE); return XFS_ERROR(ENOSYS); } @@ -313,10 +333,13 @@ xfs_mount_validate_sb( return 0; } -void -xfs_initialize_perag(xfs_mount_t *mp, int agcount) +xfs_agnumber_t +xfs_initialize_perag( + bhv_vfs_t *vfs, + xfs_mount_t *mp, + xfs_agnumber_t agcount) { - int index, max_metadata; + xfs_agnumber_t index, max_metadata; xfs_perag_t *pag; xfs_agino_t agino; xfs_ino_t ino; @@ -330,7 +353,7 @@ xfs_initialize_perag(xfs_mount_t *mp, int agcount) /* Clear the mount flag if no inode can overflow 32 bits * on this filesystem, or if specifically requested.. */ - if ((mp->m_flags & XFS_MOUNT_32BITINOOPT) && ino > max_inum) { + if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) { mp->m_flags |= XFS_MOUNT_32BITINODES; } else { mp->m_flags &= ~XFS_MOUNT_32BITINODES; @@ -347,7 +370,7 @@ xfs_initialize_perag(xfs_mount_t *mp, int agcount) icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); icount += sbp->sb_agblocks - 1; - do_div(icount, mp->m_ialloc_blks); + do_div(icount, sbp->sb_agblocks); max_metadata = icount; } else { max_metadata = agcount; @@ -359,7 +382,7 @@ xfs_initialize_perag(xfs_mount_t *mp, int agcount) break; } - /* This ag is prefered for inodes */ + /* This ag is preferred for inodes */ pag = &mp->m_perag[index]; pag->pagi_inodeok = 1; if (index < max_metadata) @@ -372,7 +395,7 @@ xfs_initialize_perag(xfs_mount_t *mp, int agcount) pag->pagi_inodeok = 1; } } - mp->m_maxagi = index; + return index; } /* @@ -382,7 +405,6 @@ xfs_initialize_perag(xfs_mount_t *mp, int agcount) * sb - a superblock * dir - conversion direction: <0 - convert sb to buf * >0 - convert buf to sb - * arch - architecture to read/write from/to buf * fields - which fields to copy (bitmask) */ void @@ -390,7 +412,6 @@ xfs_xlatesb( void *data, xfs_sb_t *sb, int dir, - xfs_arch_t arch, __int64_t fields) { xfs_caddr_t buf_ptr; @@ -415,9 +436,7 @@ xfs_xlatesb( ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); - if (arch == ARCH_NOCONVERT || - size == 1 || - xfs_sb_info[f].type == 1) { + if (size == 1 || xfs_sb_info[f].type == 1) { if (dir > 0) { memcpy(mem_ptr + first, buf_ptr + first, size); } else { @@ -428,16 +447,16 @@ xfs_xlatesb( case 2: INT_XLATE(*(__uint16_t*)(buf_ptr+first), *(__uint16_t*)(mem_ptr+first), - dir, arch); + dir, ARCH_CONVERT); break; case 4: INT_XLATE(*(__uint32_t*)(buf_ptr+first), *(__uint32_t*)(mem_ptr+first), - dir, arch); + dir, ARCH_CONVERT); break; case 8: INT_XLATE(*(__uint64_t*)(buf_ptr+first), - *(__uint64_t*)(mem_ptr+first), dir, arch); + *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT); break; default: ASSERT(0); @@ -454,7 +473,7 @@ xfs_xlatesb( * Does the initial read of the superblock. */ int -xfs_readsb(xfs_mount_t *mp) +xfs_readsb(xfs_mount_t *mp, int flags) { unsigned int sector_size; unsigned int extra_flags; @@ -476,7 +495,7 @@ xfs_readsb(xfs_mount_t *mp) bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { - cmn_err(CE_WARN, "XFS: SB read failed"); + xfs_fs_mount_cmn_err(flags, "SB read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; goto fail; } @@ -488,12 +507,11 @@ xfs_readsb(xfs_mount_t *mp) * But first do some basic consistency checking. */ sbp = XFS_BUF_TO_SBP(bp); - xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, - ARCH_CONVERT, XFS_SB_ALL_BITS); + xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); - error = xfs_mount_validate_sb(mp, &(mp->m_sb)); + error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { - cmn_err(CE_WARN, "XFS: SB validate failed"); + xfs_fs_mount_cmn_err(flags, "SB validate failed"); goto fail; } @@ -501,8 +519,8 @@ xfs_readsb(xfs_mount_t *mp) * We must be able to do sector-sized and sector-aligned IO. */ if (sector_size > mp->m_sb.sb_sectsize) { - cmn_err(CE_WARN, - "XFS: device supports only %u byte sectors (not %u)", + xfs_fs_mount_cmn_err(flags, + "device supports only %u byte sectors (not %u)", sector_size, mp->m_sb.sb_sectsize); error = ENOSYS; goto fail; @@ -519,7 +537,7 @@ xfs_readsb(xfs_mount_t *mp) bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); if (!bp || XFS_BUF_ISERROR(bp)) { - cmn_err(CE_WARN, "XFS: SB re-read failed"); + xfs_fs_mount_cmn_err(flags, "SB re-read failed"); error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; goto fail; } @@ -527,6 +545,10 @@ xfs_readsb(xfs_mount_t *mp) ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); } + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); + mp->m_sb_bp = bp; xfs_buf_relse(bp); ASSERT(XFS_BUF_VALUSEMA(bp) > 0); @@ -548,7 +570,7 @@ xfs_readsb(xfs_mount_t *mp) * fields from the superblock associated with the given * mount structure */ -void +STATIC void xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) { int i; @@ -576,12 +598,13 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); switch (sbp->sb_inodesize) { case 256: - mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2); + mp->m_attroffset = XFS_LITINO(mp) - + XFS_BMDR_SPACE_CALC(MINABTPTRS); break; case 512: case 1024: case 2048: - mp->m_attroffset = XFS_BMDR_SPACE_CALC(12); + mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); break; default: ASSERT(0); @@ -626,16 +649,15 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) */ int xfs_mountfs( - vfs_t *vfsp, + bhv_vfs_t *vfsp, xfs_mount_t *mp, int mfsi_flags) { xfs_buf_t *bp; xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; - vnode_t *rvp = 0; + bhv_vnode_t *rvp = NULL; int readio_log, writeio_log; - vmap_t vmap; xfs_daddr_t d; __uint64_t ret64; __int64_t update_flags; @@ -645,8 +667,8 @@ xfs_mountfs( int error = 0; if (mp->m_sb_bp == NULL) { - if ((error = xfs_readsb(mp))) { - return (error); + if ((error = xfs_readsb(mp, mfsi_flags))) { + return error; } } xfs_mount_common(mp, sbp); @@ -682,14 +704,21 @@ xfs_mountfs( error = XFS_ERROR(EINVAL); goto error1; } + xfs_fs_cmn_err(CE_WARN, mp, +"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", + mp->m_dalign, mp->m_swidth, + sbp->sb_agblocks); + mp->m_dalign = 0; mp->m_swidth = 0; } else if (mp->m_dalign) { mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { if (mp->m_flags & XFS_MOUNT_RETERR) { - cmn_err(CE_WARN, - "XFS: alignment check 3 failed"); + xfs_fs_cmn_err(CE_WARN, mp, +"stripe alignment turned off: sunit(%d) less than bsize(%d)", + mp->m_dalign, + mp->m_blockmask +1); error = XFS_ERROR(EINVAL); goto error1; } @@ -882,7 +911,7 @@ xfs_mountfs( * For client case we are done now */ if (mfsi_flags & XFS_MFSI_CLIENT) { - return(0); + return 0; } /* @@ -903,18 +932,7 @@ xfs_mountfs( vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; mp->m_dmevmask = 0; /* not persistent; set after each mount */ - /* - * Select the right directory manager. - */ - mp->m_dirops = - XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? - xfsv2_dirops : - xfsv1_dirops; - - /* - * Initialize directory manager's entries. - */ - XFS_DIR_MOUNT(mp); + xfs_dir_mount(mp); /* * Initialize the attribute manager's entries. @@ -940,7 +958,7 @@ xfs_mountfs( mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); - xfs_initialize_perag(mp, sbp->sb_agcount); + mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount); /* * log's mount-time initialization. Perform 1st part recovery if needed @@ -964,7 +982,7 @@ xfs_mountfs( * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ - error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_ILOCK_EXCL, &rip, 0); + error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); if (error) { cmn_err(CE_WARN, "XFS: failed to read root inode"); goto error3; @@ -972,12 +990,12 @@ xfs_mountfs( ASSERT(rip != NULL); rvp = XFS_ITOV(rip); - VMAP(rvp, vmap); if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { cmn_err(CE_WARN, "XFS: corrupted root inode"); - prdev("Root inode %llu is not a directory", - mp->m_ddev_targp, (unsigned long long)rip->i_ino); + cmn_err(CE_WARN, "Device %s - root %llu is not a directory", + XFS_BUFTARG_NAME(mp->m_ddev_targp), + (unsigned long long)rip->i_ino); xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, mp); @@ -1026,7 +1044,7 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags))) + if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) goto error4; return 0; @@ -1036,7 +1054,6 @@ xfs_mountfs( * Free up the root inode. */ VN_RELE(rvp); - vn_purge(rvp, &vmap); error3: xfs_log_unmount_dealloc(mp); error2: @@ -1065,15 +1082,14 @@ xfs_mountfs( int xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) { - struct vfs *vfsp = XFS_MTOVFS(mp); + struct bhv_vfs *vfsp = XFS_MTOVFS(mp); #if defined(DEBUG) || defined(INDUCE_IO_ERROR) int64_t fsid; #endif - xfs_iflush_all(mp, XFS_FLUSH_ALL); + xfs_iflush_all(mp); - XFS_QM_DQPURGEALL(mp, - XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING); + XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); /* * Flush out the log synchronously so that we know for sure @@ -1089,6 +1105,8 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) xfs_unmountfs_writesb(mp); + xfs_unmountfs_wait(mp); /* wait for async bufs */ + xfs_log_unmount(mp); /* Done! No more fs ops. */ xfs_freesb(mp); @@ -1098,15 +1116,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) */ ASSERT(mp->m_inodes == NULL); - /* - * We may have bufs that are in the process of getting written still. - * We must wait for the I/O completion of those. The sync flag here - * does a two pass iteration thru the bufcache. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */ - } - xfs_unmountfs_close(mp, cr); if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) xfs_uuid_unmount(mp); @@ -1133,6 +1142,16 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) xfs_free_buftarg(mp->m_ddev_targp, 0); } +STATIC void +xfs_unmountfs_wait(xfs_mount_t *mp) +{ + if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_wait_buftarg(mp->m_logdev_targp); + if (mp->m_rtdev_targp) + xfs_wait_buftarg(mp->m_rtdev_targp); + xfs_wait_buftarg(mp->m_ddev_targp); +} + int xfs_unmountfs_writesb(xfs_mount_t *mp) { @@ -1147,6 +1166,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) sbp = xfs_getsb(mp, 0); if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || XFS_FORCED_SHUTDOWN(mp))) { + + xfs_icsb_sync_counters(mp); + /* * mark shared-readonly if desired */ @@ -1175,7 +1197,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); } xfs_buf_relse(sbp); - return (error); + return error; } /* @@ -1206,7 +1228,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) /* translate/copy */ - xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, ARCH_CONVERT, fields); + xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields); /* find modified range */ @@ -1221,6 +1243,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_trans_log_buf(tp, bp, first, last); } + /* * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply * a delta to a specified field in the in-core superblock. Simply @@ -1230,7 +1253,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * * The SB_LOCK must be held when this routine is called. */ -STATIC int +int xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) { @@ -1250,22 +1273,23 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, lcounter += delta; if (lcounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_icount = lcounter; - return (0); + return 0; case XFS_SBS_IFREE: lcounter = (long long)mp->m_sb.sb_ifree; lcounter += delta; if (lcounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_ifree = lcounter; - return (0); + return 0; case XFS_SBS_FDBLOCKS: - lcounter = (long long)mp->m_sb.sb_fdblocks; + lcounter = (long long) + mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); if (delta > 0) { /* Putting blocks back */ @@ -1289,101 +1313,101 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, if (rsvd) { lcounter = (long long)mp->m_resblks_avail + delta; if (lcounter < 0) { - return (XFS_ERROR(ENOSPC)); + return XFS_ERROR(ENOSPC); } mp->m_resblks_avail = lcounter; - return (0); + return 0; } else { /* not reserved */ - return (XFS_ERROR(ENOSPC)); + return XFS_ERROR(ENOSPC); } } } - mp->m_sb.sb_fdblocks = lcounter; - return (0); + mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); + return 0; case XFS_SBS_FREXTENTS: lcounter = (long long)mp->m_sb.sb_frextents; lcounter += delta; if (lcounter < 0) { - return (XFS_ERROR(ENOSPC)); + return XFS_ERROR(ENOSPC); } mp->m_sb.sb_frextents = lcounter; - return (0); + return 0; case XFS_SBS_DBLOCKS: lcounter = (long long)mp->m_sb.sb_dblocks; lcounter += delta; if (lcounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_dblocks = lcounter; - return (0); + return 0; case XFS_SBS_AGCOUNT: scounter = mp->m_sb.sb_agcount; scounter += delta; if (scounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_agcount = scounter; - return (0); + return 0; case XFS_SBS_IMAX_PCT: scounter = mp->m_sb.sb_imax_pct; scounter += delta; if (scounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_imax_pct = scounter; - return (0); + return 0; case XFS_SBS_REXTSIZE: scounter = mp->m_sb.sb_rextsize; scounter += delta; if (scounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_rextsize = scounter; - return (0); + return 0; case XFS_SBS_RBMBLOCKS: scounter = mp->m_sb.sb_rbmblocks; scounter += delta; if (scounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_rbmblocks = scounter; - return (0); + return 0; case XFS_SBS_RBLOCKS: lcounter = (long long)mp->m_sb.sb_rblocks; lcounter += delta; if (lcounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_rblocks = lcounter; - return (0); + return 0; case XFS_SBS_REXTENTS: lcounter = (long long)mp->m_sb.sb_rextents; lcounter += delta; if (lcounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_rextents = lcounter; - return (0); + return 0; case XFS_SBS_REXTSLOG: scounter = mp->m_sb.sb_rextslog; scounter += delta; if (scounter < 0) { ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } mp->m_sb.sb_rextslog = scounter; - return (0); + return 0; default: ASSERT(0); - return (XFS_ERROR(EINVAL)); + return XFS_ERROR(EINVAL); } } @@ -1399,10 +1423,27 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) unsigned long s; int status; - s = XFS_SB_LOCK(mp); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); - return (status); + /* check for per-cpu counters */ + switch (field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = xfs_icsb_modify_counters(mp, field, + delta, rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + s = XFS_SB_LOCK(mp); + status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + XFS_SB_UNLOCK(mp, s); + break; + } + + return status; } /* @@ -1438,8 +1479,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) * from the loop so we'll fall into the undo loop * below. */ - status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, - msbp->msb_delta, rsvd); + switch (msbp->msb_field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = xfs_icsb_modify_counters_locked(mp, + msbp->msb_field, + msbp->msb_delta, rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + status = xfs_mod_incore_sb_unlocked(mp, + msbp->msb_field, + msbp->msb_delta, rsvd); + break; + } + if (status != 0) { break; } @@ -1456,14 +1515,34 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) if (status != 0) { msbp--; while (msbp >= msb) { - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, -(msbp->msb_delta), rsvd); + switch (msbp->msb_field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = + xfs_icsb_modify_counters_locked(mp, + msbp->msb_field, + -(msbp->msb_delta), + rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + status = xfs_mod_incore_sb_unlocked(mp, + msbp->msb_field, + -(msbp->msb_delta), + rsvd); + break; + } ASSERT(status == 0); msbp--; } } XFS_SB_UNLOCK(mp, s); - return (status); + return status; } /* @@ -1493,7 +1572,7 @@ xfs_getsb( } XFS_BUF_HOLD(bp); ASSERT(XFS_BUF_ISDONE(bp)); - return (bp); + return bp; } /* @@ -1570,3 +1649,529 @@ xfs_mount_log_sbunit( xfs_mod_sb(tp, fields); xfs_trans_commit(tp, 0, NULL); } + + +#ifdef HAVE_PERCPU_SB +/* + * Per-cpu incore superblock counters + * + * Simple concept, difficult implementation + * + * Basically, replace the incore superblock counters with a distributed per cpu + * counter for contended fields (e.g. free block count). + * + * Difficulties arise in that the incore sb is used for ENOSPC checking, and + * hence needs to be accurately read when we are running low on space. Hence + * there is a method to enable and disable the per-cpu counters based on how + * much "stuff" is available in them. + * + * Basically, a counter is enabled if there is enough free resource to justify + * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local + * ENOSPC), then we disable the counters to synchronise all callers and + * re-distribute the available resources. + * + * If, once we redistributed the available resources, we still get a failure, + * we disable the per-cpu counter and go through the slow path. + * + * The slow path is the current xfs_mod_incore_sb() function. This means that + * when we disable a per-cpu counter, we need to drain it's resources back to + * the global superblock. We do this after disabling the counter to prevent + * more threads from queueing up on the counter. + * + * Essentially, this means that we still need a lock in the fast path to enable + * synchronisation between the global counters and the per-cpu counters. This + * is not a problem because the lock will be local to a CPU almost all the time + * and have little contention except when we get to ENOSPC conditions. + * + * Basically, this lock becomes a barrier that enables us to lock out the fast + * path while we do things like enabling and disabling counters and + * synchronising the counters. + * + * Locking rules: + * + * 1. XFS_SB_LOCK() before picking up per-cpu locks + * 2. per-cpu locks always picked up via for_each_online_cpu() order + * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks + * 4. modifying per-cpu counters requires holding per-cpu lock + * 5. modifying global counters requires holding XFS_SB_LOCK + * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK + * and _none_ of the per-cpu locks. + * + * Disabled counters are only ever re-enabled by a balance operation + * that results in more free resources per CPU than a given threshold. + * To ensure counters don't remain disabled, they are rebalanced when + * the global resource goes above a higher threshold (i.e. some hysteresis + * is present to prevent thrashing). + */ + +#ifdef CONFIG_HOTPLUG_CPU +/* + * hot-plug CPU notifier support. + * + * We need a notifier per filesystem as we need to be able to identify + * the filesystem to balance the counters out. This is achieved by + * having a notifier block embedded in the xfs_mount_t and doing pointer + * magic to get the mount pointer from the notifier block address. + */ +STATIC int +xfs_icsb_cpu_notify( + struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + xfs_icsb_cnts_t *cntp; + xfs_mount_t *mp; + int s; + + mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); + cntp = (xfs_icsb_cnts_t *) + per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); + switch (action) { + case CPU_UP_PREPARE: + /* Easy Case - initialize the area and locks, and + * then rebalance when online does everything else for us. */ + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); + break; + case CPU_ONLINE: + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); + break; + case CPU_DEAD: + /* Disable all the counters, then fold the dead cpu's + * count into the total on the global superblock and + * re-enable the counters. */ + s = XFS_SB_LOCK(mp); + xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); + xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); + xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); + + mp->m_sb.sb_icount += cntp->icsb_icount; + mp->m_sb.sb_ifree += cntp->icsb_ifree; + mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; + + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); + + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); + XFS_SB_UNLOCK(mp, s); + break; + } + + return NOTIFY_OK; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +int +xfs_icsb_init_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); + if (mp->m_sb_cnts == NULL) + return -ENOMEM; + +#ifdef CONFIG_HOTPLUG_CPU + mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; + mp->m_icsb_notifier.priority = 0; + register_hotcpu_notifier(&mp->m_icsb_notifier); +#endif /* CONFIG_HOTPLUG_CPU */ + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); + } + /* + * start with all counters disabled so that the + * initial balance kicks us off correctly + */ + mp->m_icsb_counters = -1; + return 0; +} + +STATIC void +xfs_icsb_destroy_counters( + xfs_mount_t *mp) +{ + if (mp->m_sb_cnts) { + unregister_hotcpu_notifier(&mp->m_icsb_notifier); + free_percpu(mp->m_sb_cnts); + } +} + +STATIC inline void +xfs_icsb_lock_cntr( + xfs_icsb_cnts_t *icsbp) +{ + while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { + ndelay(1000); + } +} + +STATIC inline void +xfs_icsb_unlock_cntr( + xfs_icsb_cnts_t *icsbp) +{ + clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); +} + + +STATIC inline void +xfs_icsb_lock_all_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + xfs_icsb_lock_cntr(cntp); + } +} + +STATIC inline void +xfs_icsb_unlock_all_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + xfs_icsb_unlock_cntr(cntp); + } +} + +STATIC void +xfs_icsb_count( + xfs_mount_t *mp, + xfs_icsb_cnts_t *cnt, + int flags) +{ + xfs_icsb_cnts_t *cntp; + int i; + + memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); + + if (!(flags & XFS_ICSB_LAZY_COUNT)) + xfs_icsb_lock_all_counters(mp); + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + cnt->icsb_icount += cntp->icsb_icount; + cnt->icsb_ifree += cntp->icsb_ifree; + cnt->icsb_fdblocks += cntp->icsb_fdblocks; + } + + if (!(flags & XFS_ICSB_LAZY_COUNT)) + xfs_icsb_unlock_all_counters(mp); +} + +STATIC int +xfs_icsb_counter_disabled( + xfs_mount_t *mp, + xfs_sb_field_t field) +{ + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + return test_bit(field, &mp->m_icsb_counters); +} + +STATIC int +xfs_icsb_disable_counter( + xfs_mount_t *mp, + xfs_sb_field_t field) +{ + xfs_icsb_cnts_t cnt; + + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + + xfs_icsb_lock_all_counters(mp); + if (!test_and_set_bit(field, &mp->m_icsb_counters)) { + /* drain back to superblock */ + + xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); + switch(field) { + case XFS_SBS_ICOUNT: + mp->m_sb.sb_icount = cnt.icsb_icount; + break; + case XFS_SBS_IFREE: + mp->m_sb.sb_ifree = cnt.icsb_ifree; + break; + case XFS_SBS_FDBLOCKS: + mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; + break; + default: + BUG(); + } + } + + xfs_icsb_unlock_all_counters(mp); + + return 0; +} + +STATIC void +xfs_icsb_enable_counter( + xfs_mount_t *mp, + xfs_sb_field_t field, + uint64_t count, + uint64_t resid) +{ + xfs_icsb_cnts_t *cntp; + int i; + + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + + xfs_icsb_lock_all_counters(mp); + for_each_online_cpu(i) { + cntp = per_cpu_ptr(mp->m_sb_cnts, i); + switch (field) { + case XFS_SBS_ICOUNT: + cntp->icsb_icount = count + resid; + break; + case XFS_SBS_IFREE: + cntp->icsb_ifree = count + resid; + break; + case XFS_SBS_FDBLOCKS: + cntp->icsb_fdblocks = count + resid; + break; + default: + BUG(); + break; + } + resid = 0; + } + clear_bit(field, &mp->m_icsb_counters); + xfs_icsb_unlock_all_counters(mp); +} + +STATIC void +xfs_icsb_sync_counters_int( + xfs_mount_t *mp, + int flags) +{ + xfs_icsb_cnts_t cnt; + int s; + + /* Pass 1: lock all counters */ + if ((flags & XFS_ICSB_SB_LOCKED) == 0) + s = XFS_SB_LOCK(mp); + + xfs_icsb_count(mp, &cnt, flags); + + /* Step 3: update mp->m_sb fields */ + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) + mp->m_sb.sb_icount = cnt.icsb_icount; + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) + mp->m_sb.sb_ifree = cnt.icsb_ifree; + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) + mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; + + if ((flags & XFS_ICSB_SB_LOCKED) == 0) + XFS_SB_UNLOCK(mp, s); +} + +/* + * Accurate update of per-cpu counters to incore superblock + */ +STATIC void +xfs_icsb_sync_counters( + xfs_mount_t *mp) +{ + xfs_icsb_sync_counters_int(mp, 0); +} + +/* + * lazy addition used for things like df, background sb syncs, etc + */ +void +xfs_icsb_sync_counters_lazy( + xfs_mount_t *mp) +{ + xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT); +} + +/* + * Balance and enable/disable counters as necessary. + * + * Thresholds for re-enabling counters are somewhat magic. + * inode counts are chosen to be the same number as single + * on disk allocation chunk per CPU, and free blocks is + * something far enough zero that we aren't going thrash + * when we get near ENOSPC. + */ +#define XFS_ICSB_INO_CNTR_REENABLE 64 +#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ + (512 + XFS_ALLOC_SET_ASIDE(mp)) +STATIC void +xfs_icsb_balance_counter( + xfs_mount_t *mp, + xfs_sb_field_t field, + int flags) +{ + uint64_t count, resid; + int weight = num_online_cpus(); + int s; + + if (!(flags & XFS_ICSB_SB_LOCKED)) + s = XFS_SB_LOCK(mp); + + /* disable counter and sync counter */ + xfs_icsb_disable_counter(mp, field); + + /* update counters - first CPU gets residual*/ + switch (field) { + case XFS_SBS_ICOUNT: + count = mp->m_sb.sb_icount; + resid = do_div(count, weight); + if (count < XFS_ICSB_INO_CNTR_REENABLE) + goto out; + break; + case XFS_SBS_IFREE: + count = mp->m_sb.sb_ifree; + resid = do_div(count, weight); + if (count < XFS_ICSB_INO_CNTR_REENABLE) + goto out; + break; + case XFS_SBS_FDBLOCKS: + count = mp->m_sb.sb_fdblocks; + resid = do_div(count, weight); + if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp)) + goto out; + break; + default: + BUG(); + count = resid = 0; /* quiet, gcc */ + break; + } + + xfs_icsb_enable_counter(mp, field, count, resid); +out: + if (!(flags & XFS_ICSB_SB_LOCKED)) + XFS_SB_UNLOCK(mp, s); +} + +STATIC int +xfs_icsb_modify_counters_int( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd, + int flags) +{ + xfs_icsb_cnts_t *icsbp; + long long lcounter; /* long counter for 64 bit fields */ + int cpu, s, locked = 0; + int ret = 0, balance_done = 0; + +again: + cpu = get_cpu(); + icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), + xfs_icsb_lock_cntr(icsbp); + if (unlikely(xfs_icsb_counter_disabled(mp, field))) + goto slow_path; + + switch (field) { + case XFS_SBS_ICOUNT: + lcounter = icsbp->icsb_icount; + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_icount = lcounter; + break; + + case XFS_SBS_IFREE: + lcounter = icsbp->icsb_ifree; + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_ifree = lcounter; + break; + + case XFS_SBS_FDBLOCKS: + BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); + + lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); + break; + default: + BUG(); + break; + } + xfs_icsb_unlock_cntr(icsbp); + put_cpu(); + if (locked) + XFS_SB_UNLOCK(mp, s); + return 0; + + /* + * The slow path needs to be run with the SBLOCK + * held so that we prevent other threads from + * attempting to run this path at the same time. + * this provides exclusion for the balancing code, + * and exclusive fallback if the balance does not + * provide enough resources to continue in an unlocked + * manner. + */ +slow_path: + xfs_icsb_unlock_cntr(icsbp); + put_cpu(); + + /* need to hold superblock incase we need + * to disable a counter */ + if (!(flags & XFS_ICSB_SB_LOCKED)) { + s = XFS_SB_LOCK(mp); + locked = 1; + flags |= XFS_ICSB_SB_LOCKED; + } + if (!balance_done) { + xfs_icsb_balance_counter(mp, field, flags); + balance_done = 1; + goto again; + } else { + /* + * we might not have enough on this local + * cpu to allocate for a bulk request. + * We need to drain this field from all CPUs + * and disable the counter fastpath + */ + xfs_icsb_disable_counter(mp, field); + } + + ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + + if (locked) + XFS_SB_UNLOCK(mp, s); + return ret; +} + +STATIC int +xfs_icsb_modify_counters( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd) +{ + return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0); +} + +/* + * Called when superblock is already locked + */ +STATIC int +xfs_icsb_modify_counters_locked( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd) +{ + return xfs_icsb_modify_counters_int(mp, field, delta, + rsvd, XFS_ICSB_SB_LOCKED); +} +#endif