X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_vnodeops.c;h=2344664a834a2ce3b82ff9aa82da400b351215ad;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=2777aca04bc2631be2b30b2bb1dfe1aa7d208085;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2777aca04..2344664a8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,101 +1,66 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * Further, this software is distributed without any warranty that it is - * free of the rightful claim of any third person regarding infringement - * or the like. Any license provided herein, whether implied or - * otherwise, applies only to this software file. Patent licenses, if - * any, provided herein do not apply to combinations of this program with - * other software, or any other product whatsoever. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * - * For further information regarding this notice, see: - * - * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_macros.h" +#include "xfs_fs.h" #include "xfs_types.h" -#include "xfs_inum.h" +#include "xfs_bit.h" #include "xfs_log.h" +#include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_alloc_btree.h" +#include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_itable.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" -#include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" -#include "xfs_dir2_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode_item.h" -#include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_rw.h" -#include "xfs_refcache.h" #include "xfs_error.h" -#include "xfs_bit.h" -#include "xfs_rtalloc.h" #include "xfs_quota.h" #include "xfs_utils.h" +#include "xfs_rtalloc.h" +#include "xfs_refcache.h" #include "xfs_trans_space.h" -#include "xfs_dir_leaf.h" -#include "xfs_mac.h" #include "xfs_log_priv.h" +#include "xfs_mac.h" - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 - -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ STATIC int xfs_open( bhv_desc_t *bdp, cred_t *credp) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -104,7 +69,7 @@ xfs_open( * If it's a directory with any blocks, read-ahead block 0 * as we're almost certain to have the next operation be a read there. */ - if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { + if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { mode = xfs_ilock_map_shared(ip); if (ip->i_d.di_nextents > 0) (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); @@ -113,6 +78,35 @@ xfs_open( return 0; } +STATIC int +xfs_close( + bhv_desc_t *bdp, + int flags, + lastclose_t lastclose, + cred_t *credp) +{ + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + if (lastclose != L_TRUE || !VN_ISREG(vp)) + return 0; + + /* + * If we previously truncated this file and removed old data in + * the process, we want to initiate "early" writeout on the last + * close. This is an attempt to combat the notorious NULL files + * problem which is particularly noticable from a truncate down, + * buffered (re-)write (delalloc), followed by a crash. What we + * are effectively doing here is significantly reducing the time + * window where we'd otherwise be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + return bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); + return 0; +} /* * xfs_getattr @@ -120,13 +114,13 @@ xfs_open( STATIC int xfs_getattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { xfs_inode_t *ip; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -163,55 +157,26 @@ xfs_getattr( /* * Copy from in-core inode. */ - vap->va_type = vp->v_type; - vap->va_mode = ip->i_d.di_mode & MODEMASK; + vap->va_mode = ip->i_d.di_mode; vap->va_uid = ip->i_d.di_uid; vap->va_gid = ip->i_d.di_gid; + vap->va_tag = ip->i_d.di_tag; vap->va_projid = ip->i_d.di_projid; /* * Check vnode type block/char vs. everything else. - * Do it with bitmask because that's faster than looking - * for multiple values individually. */ - if (((1 << vp->v_type) & ((1<i_d.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + vap->va_rdev = ip->i_df.if_u2.if_rdev; + vap->va_blocksize = BLKDEV_IOSIZE; + break; + default: vap->va_rdev = 0; if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { - -#if 0 - /* Large block sizes confuse various - * user space programs, so letting the - * stripe size through is not a good - * idea for now. - */ - vap->va_blocksize = mp->m_swidth ? - /* - * If the underlying volume is a stripe, then - * return the stripe width in bytes as the - * recommended I/O size. - */ - (mp->m_swidth << mp->m_sb.sb_blocklog) : - /* - * Return the largest of the preferred buffer - * sizes since doing small I/Os into larger - * buffers causes buffers to be decommissioned. - * The value returned is in bytes. - */ - (1 << (int)MAX(mp->m_readio_log, - mp->m_writeio_log)); - -#else - vap->va_blocksize = - /* - * Return the largest of the preferred buffer - * sizes since doing small I/Os into larger - * buffers causes buffers to be decommissioned. - * The value returned is in bytes. - */ - 1 << (int)MAX(mp->m_readio_log, - mp->m_writeio_log); -#endif + vap->va_blocksize = xfs_preferred_iosize(mp); } else { /* @@ -224,13 +189,10 @@ xfs_getattr( (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); } - } else { - vap->va_rdev = ip->i_df.if_u2.if_rdev; - vap->va_blocksize = BLKDEV_IOSIZE; + break; } - vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; - vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + vn_atime_to_timespec(vp, &vap->va_atime); vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; @@ -248,7 +210,7 @@ xfs_getattr( /* * Convert di_flags to xflags. */ - vap->va_xflags = xfs_dic2xflags(&ip->i_d, ARCH_NOCONVERT); + vap->va_xflags = xfs_ip2xflags(ip); /* * Exit for inode revalidate. See if any of the rest of @@ -283,10 +245,10 @@ xfs_getattr( /* * xfs_setattr */ -STATIC int +int xfs_setattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { @@ -299,12 +261,14 @@ xfs_setattr( uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; + tag_t tag=0, itag=0; int timeflags = 0; - vnode_t *vp; + bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; int mandlock_before, mandlock_after; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; int file_owner; + int need_iolock = 1; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -350,21 +314,29 @@ xfs_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) { + if (XFS_IS_QUOTA_ON(mp) && + (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { uint qflags = 0; - if (mask & XFS_AT_UID) { + /* TODO: handle tagging? */ + if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = vap->va_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } - if (mask & XFS_AT_GID) { + if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = vap->va_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } + if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { + projid = vap->va_projid; + qflags |= XFS_QMOPT_PQUOTA; + } else { + projid = ip->i_d.di_projid; + } /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on @@ -372,9 +344,10 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp); + code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, + &udqp, &gdqp); if (code) - return (code); + return code; } /* @@ -383,6 +356,8 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; + if (flags & ATTR_NOLOCK) + need_iolock = 0; if (!(mask & XFS_AT_SIZE)) { if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || (mp->m_flags & XFS_MOUNT_WSYNC)) { @@ -406,16 +381,12 @@ xfs_setattr( goto error_return; } } - lock_flags |= XFS_IOLOCK_EXCL; + if (need_iolock) + lock_flags |= XFS_IOLOCK_EXCL; } xfs_ilock(ip, lock_flags); - if (_MAC_XFS_IACCESS(ip, MACWRITE, credp)) { - code = XFS_ERROR(EACCES); - goto error_return; - } - /* boolean: are we the file owner? */ file_owner = (current_fsuid(credp) == ip->i_d.di_uid); @@ -427,6 +398,8 @@ xfs_setattr( if (mask & (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| XFS_AT_GID|XFS_AT_PROJID)) { + /* TODO: handle tagging? */ + /* * CAP_FOWNER overrides the following restrictions: * @@ -460,7 +433,7 @@ xfs_setattr( m |= S_ISGID; #if 0 /* Linux allows this, Irix doesn't. */ - if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) + if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) m |= S_ISVTX; #endif if (m && !capable(CAP_FSETID)) @@ -475,7 +448,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -483,10 +456,12 @@ xfs_setattr( * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + itag = ip->i_d.di_tag; + iprojid = ip->i_d.di_projid; uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + tag = (mask & XFS_AT_TAG) ? vap->va_tag : itag; projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : iprojid; @@ -499,8 +474,6 @@ xfs_setattr( * that the group ID supplied to the chown() function * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. - * - * XXX: How does restricted_chown affect projid? */ if (restricted_chown && (iuid != uid || (igid != gid && @@ -510,11 +483,13 @@ xfs_setattr( goto error_return; } /* - * Do a quota reservation only if uid or gid is actually + * Do a quota reservation only if uid/projid/gid is actually * going to change. */ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + /* TODO: handle tagging? */ ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -539,10 +514,10 @@ xfs_setattr( goto error_return; } - if (vp->v_type == VDIR) { + if (VN_ISDIR(vp)) { code = XFS_ERROR(EISDIR); goto error_return; - } else if (vp->v_type != VREG) { + } else if (!VN_ISREG(vp)) { code = XFS_ERROR(EINVAL); goto error_return; } @@ -573,36 +548,18 @@ xfs_setattr( /* * Can't change extent size if any extents are allocated. */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (mask & XFS_AT_EXTSIZE) && + if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != vap->va_extsize) ) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } - /* - * Can't set extent size unless the file is marked, or - * about to be marked as a realtime file. - * - * This check will be removed when fixed size extents - * with buffered data writes is implemented. - * - */ - if ((mask & XFS_AT_EXTSIZE) && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - vap->va_extsize) && - (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || - ((mask & XFS_AT_XFLAGS) && - (vap->va_xflags & XFS_XFLAG_REALTIME))))) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - /* * Can't change realtime flag if any extents are allocated. */ - if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) && + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (mask & XFS_AT_XFLAGS) && (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != (vap->va_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ @@ -666,9 +623,12 @@ xfs_setattr( */ if (mask & XFS_AT_SIZE) { code = 0; - if (vap->va_size > ip->i_d.di_size) + if ((vap->va_size > ip->i_d.di_size) && + (flags & ATTR_NOSIZETOK) == 0) { code = xfs_igrow_start(ip, vap->va_size, credp); + } xfs_iunlock(ip, XFS_ILOCK_EXCL); + vn_iowait(vp); /* wait for the completion of any pending DIOs */ if (!code) code = xfs_itruncate_data(ip, vap->va_size); if (code) { @@ -683,7 +643,8 @@ xfs_setattr( XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return code; } commit_flags = XFS_TRANS_RELEASE_LOG_RES; @@ -718,9 +679,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + VTRUNCATE(vp); } /* * Have to do this even if the file's size doesn't change. @@ -746,7 +715,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -762,6 +731,9 @@ xfs_setattr( * Change the ownerships and register quota modifications * in the transaction. */ + if (itag != tag) { + ip->i_d.di_tag = tag; + } if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & XFS_AT_UID); @@ -773,6 +745,7 @@ xfs_setattr( } if (igid != gid) { if (XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & XFS_AT_GID); ASSERT(gdqp); olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, @@ -781,6 +754,13 @@ xfs_setattr( ip->i_d.di_gid = gid; } if (iprojid != projid) { + if (XFS_IS_PQUOTA_ON(mp)) { + ASSERT(!XFS_IS_GQUOTA_ON(mp)); + ASSERT(mask & XFS_AT_PROJID); + ASSERT(gdqp); + olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_gdquot, gdqp); + } ip->i_d.di_projid = projid; /* * We may have to rev the inode as well as @@ -828,26 +808,46 @@ xfs_setattr( mp->m_sb.sb_blocklog; } if (mask & XFS_AT_XFLAGS) { - ip->i_d.di_flags = 0; - if (vap->va_xflags & XFS_XFLAG_REALTIME) { - ip->i_d.di_flags |= XFS_DIFLAG_REALTIME; - ip->i_iocore.io_flags |= XFS_IOCORE_RT; - } + uint di_flags; + + /* can't set PREALLOC this way, just preserve it */ + di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) - ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE; + di_flags |= XFS_DIFLAG_IMMUTABLE; if (vap->va_xflags & XFS_XFLAG_IUNLINK) - ip->i_d.di_flags |= XFS_DIFLAG_IUNLINK; + di_flags |= XFS_DIFLAG_IUNLINK; if (vap->va_xflags & XFS_XFLAG_BARRIER) - ip->i_d.di_flags |= XFS_DIFLAG_BARRIER; + di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) - ip->i_d.di_flags |= XFS_DIFLAG_APPEND; + di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) - ip->i_d.di_flags |= XFS_DIFLAG_SYNC; + di_flags |= XFS_DIFLAG_SYNC; if (vap->va_xflags & XFS_XFLAG_NOATIME) - ip->i_d.di_flags |= XFS_DIFLAG_NOATIME; + di_flags |= XFS_DIFLAG_NOATIME; if (vap->va_xflags & XFS_XFLAG_NODUMP) - ip->i_d.di_flags |= XFS_DIFLAG_NODUMP; - /* can't set PREALLOC this way, just ignore it */ + di_flags |= XFS_DIFLAG_NODUMP; + if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + if (vap->va_xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { + if (vap->va_xflags & XFS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { + if (vap->va_xflags & XFS_XFLAG_REALTIME) { + di_flags |= XFS_DIFLAG_REALTIME; + ip->i_iocore.io_flags |= XFS_IOCORE_RT; + } else { + ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; + } + if (vap->va_xflags & XFS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; + } + ip->i_d.di_flags = di_flags; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); timeflags |= XFS_ICHGTIME_CHG; @@ -878,7 +878,7 @@ xfs_setattr( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesytems. + * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis @@ -899,7 +899,7 @@ xfs_setattr( */ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); if (mandlock_before != mandlock_after) { - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, mandlock_after); } @@ -965,6 +965,13 @@ xfs_access( } +/* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + /* * xfs_readlink * @@ -980,7 +987,7 @@ xfs_readlink( int count; xfs_off_t offset; int pathlen; - vnode_t *vp; + bhv_vnode_t *vp; int error = 0; xfs_mount_t *mp; int nmaps; @@ -1015,17 +1022,13 @@ xfs_readlink( goto error_return; } - if (!(ioflags & IO_INVIS)) { - xfs_ichgtime(ip, XFS_ICHGTIME_ACC); - } - /* * See if the symlink is stored inline. */ pathlen = (int)ip->i_d.di_size; if (ip->i_df.if_flags & XFS_IFINLINE) { - error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); + error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); } else { /* @@ -1034,7 +1037,7 @@ xfs_readlink( nmaps = SYMLINK_MAPS; error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), - 0, NULL, 0, mval, &nmaps, NULL); + 0, NULL, 0, mval, &nmaps, NULL, NULL); if (error) { goto error_return; @@ -1056,17 +1059,14 @@ xfs_readlink( byte_cnt = pathlen; pathlen -= byte_cnt; - error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); + error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); xfs_buf_relse (bp); } } - error_return: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; } @@ -1091,6 +1091,7 @@ xfs_fsync( xfs_inode_t *ip; xfs_trans_t *tp; int error; + int log_flushed = 0, changed = 1; vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, (inst_t *)__return_address); @@ -1144,10 +1145,18 @@ xfs_fsync( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, + _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE | ((flag & FSYNC_WAIT) - ? XFS_LOG_SYNC : 0)); + ? XFS_LOG_SYNC : 0), + &log_flushed); + } else { + /* + * If the inode is not pinned and nothing + * has changed we don't need to flush the + * cache. + */ + changed = 0; } error = 0; } else { @@ -1183,10 +1192,27 @@ xfs_fsync( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (flag & FSYNC_WAIT) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } + + if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { + /* + * If the log write didn't issue an ordered tag we need + * to flush the disk cache for the data device now. + */ + if (!log_flushed) + xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); + + /* + * If this inode is on the RT dev we need to flush that + * cache as well. + */ + if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) + xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); + } + return error; } @@ -1215,21 +1241,22 @@ xfs_inactive_free_eofblocks( last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) - return (0); + return 0; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && - (imap.br_startblock != HOLESTARTBLOCK)) { + (imap.br_startblock != HOLESTARTBLOCK || + ip->i_delayed_blks)) { /* * Attach the dquots to the inode up front. */ if ((error = XFS_QM_DQATTACH(mp, ip, 0))) - return (error); + return error; /* * There are blocks after the end of file. @@ -1257,7 +1284,7 @@ xfs_inactive_free_eofblocks( ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1285,7 +1312,7 @@ xfs_inactive_free_eofblocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); } - return (error); + return error; } /* @@ -1345,10 +1372,10 @@ xfs_inactive_symlink_rmt( */ done = 0; XFS_BMAP_INIT(&free_list, &first_block); - nmaps = sizeof(mval) / sizeof(mval[0]); + nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list))) + &free_list, NULL))) goto error0; /* * Invalidate the block(s). @@ -1363,7 +1390,7 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) + &first_block, &free_list, NULL, &done))) goto error1; ASSERT(done); /* @@ -1390,7 +1417,7 @@ xfs_inactive_symlink_rmt( */ ntp = xfs_trans_dup(tp); /* - * Commit the transaction containing extent freeing and EFD's. + * Commit the transaction containing extent freeing and EFDs. * If we get an error on the commit here or on the reserve below, * we need to unlock the inode since the new transaction doesn't * have the inode attached. @@ -1463,7 +1490,7 @@ xfs_inactive_symlink_local( if (error) { xfs_trans_cancel(*tpp, 0); *tpp = NULL; - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); @@ -1476,12 +1503,9 @@ xfs_inactive_symlink_local( XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); } - return (0); + return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1502,7 +1526,7 @@ xfs_inactive_attrs( if (error) { *tpp = NULL; xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); /* goto out*/ + return error; /* goto out */ } tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); @@ -1515,7 +1539,7 @@ xfs_inactive_attrs( xfs_trans_cancel(tp, 0); *tpp = NULL; xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (error); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1526,7 +1550,7 @@ xfs_inactive_attrs( ASSERT(ip->i_d.di_anextents == 0); *tpp = tp; - return (0); + return 0; } STATIC int @@ -1534,16 +1558,16 @@ xfs_release( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; xfs_mount_t *mp; int error; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; - if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ if (vp->v_vfsp->vfs_flag & VFS_RDONLY) @@ -1555,17 +1579,17 @@ xfs_release( return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) { + (!(ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { if ((error = xfs_inactive_free_eofblocks(mp, ip))) - return (error); + return error; /* Update linux inode block count after free above */ - LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, + vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); } } @@ -1587,8 +1611,8 @@ xfs_inactive( cred_t *credp) { xfs_inode_t *ip; - vnode_t *vp; - xfs_bmap_free_t free_list; + bhv_vnode_t *vp; + xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; xfs_trans_t *tp; @@ -1605,7 +1629,7 @@ xfs_inactive( * If the inode is already free, then there can be nothing * to clean up here. */ - if (ip->i_d.di_mode == 0) { + if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); return VN_INACTIVE_CACHE; @@ -1618,7 +1642,8 @@ xfs_inactive( * only one with a reference to the inode. */ truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) && + ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || + (ip->i_delayed_blks > 0)) && ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); mp = ip->i_mount; @@ -1636,14 +1661,16 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && - (ip->i_df.if_flags & XFS_IFEXTENTS)) && - (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) || - (ip->i_delayed_blks != 0))) { + ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ip->i_delayed_blks > 0)) && + (ip->i_df.if_flags & XFS_IFEXTENTS) && + (!(ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || + (ip->i_delayed_blks != 0)))) { if ((error = xfs_inactive_free_eofblocks(mp, ip))) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; /* Update linux inode block count after free above */ - LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, + vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); } goto out; @@ -1652,7 +1679,7 @@ xfs_inactive( ASSERT(ip->i_d.di_nlink == 0); if ((error = XFS_QM_DQATTACH(mp, ip, 0))) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (truncate) { @@ -1675,7 +1702,7 @@ xfs_inactive( ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1696,7 +1723,7 @@ xfs_inactive( xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { @@ -1710,7 +1737,7 @@ xfs_inactive( if (error) { ASSERT(tp == NULL); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); @@ -1723,7 +1750,7 @@ xfs_inactive( if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); @@ -1745,7 +1772,7 @@ xfs_inactive( * cancelled, and the inode is unlocked. Just get out. */ if (error) - return (VN_INACTIVE_CACHE); + return VN_INACTIVE_CACHE; } else if (ip->i_afp) { xfs_idestroy_fork(ip, XFS_ATTR_FORK); } @@ -1765,7 +1792,7 @@ xfs_inactive( cmn_err(CE_NOTE, "xfs_inactive: xfs_ifree() returned an error = %d on %s", error, mp->m_fsname); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } else { @@ -1800,17 +1827,17 @@ xfs_inactive( STATIC int xfs_lookup( bhv_desc_t *dir_bdp, - vname_t *dentry, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vnode_t **vpp, int flags, - vnode_t *rdir, + bhv_vnode_t *rdir, cred_t *credp) { xfs_inode_t *dp, *ip; xfs_ino_t e_inum; int error; uint lock_mode; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); @@ -1831,23 +1858,21 @@ xfs_lookup( } -#define XFS_CREATE_NEW_MAXTRIES 10000 - /* * xfs_create (create a new file). */ STATIC int xfs_create( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *name = VNAME(dentry); - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; xfs_inode_t *dp, *ip; - vnode_t *vp=NULL; + bhv_vnode_t *vp = NULL; xfs_trans_t *tp; xfs_mount_t *mp; xfs_dev_t rdev; @@ -1871,7 +1896,7 @@ xfs_create( dp = XFS_BHVTOI(dir_bdp); mp = dp->i_mount; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; namelen = VNAMELEN(dentry); if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { @@ -1891,7 +1916,9 @@ xfs_create( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -1900,7 +1927,7 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1943,12 +1970,10 @@ xfs_create( if (error) goto error_return; - if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) + if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 1, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, rdev, credp, prid, resblks > 0, &ip, &committed); if (error) { @@ -1976,9 +2001,9 @@ xfs_create( xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); dp_joined_to_trans = B_TRUE; - error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto abort_return; @@ -1991,7 +2016,7 @@ xfs_create( * create transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -2029,10 +2054,10 @@ xfs_create( XFS_QM_DQRELE(mp, gdqp); /* - * Propogate the fact that the vnode changed after the + * Propagate the fact that the vnode changed after the * xfs_inode locks have been released. */ - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); *vpp = vp; @@ -2053,8 +2078,8 @@ std_return: abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ - error_return: + error_return: if (tp != NULL) xfs_trans_cancel(tp, cancel_flags); @@ -2113,7 +2138,7 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - vname_t *dentry, + bhv_vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2327,10 +2352,10 @@ int remove_which_error_return = 0; STATIC int xfs_remove( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; char *name = VNAME(dentry); xfs_inode_t *dp, *ip; xfs_trans_t *tp = NULL; @@ -2356,10 +2381,15 @@ xfs_remove( namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &ip)) { + dm_di_mode = ip->i_d.di_mode; + IRELE(ip); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return error; } @@ -2376,7 +2406,7 @@ xfs_remove( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &ip); @@ -2450,17 +2480,12 @@ xfs_remove( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); } - if ((error = _MAC_XFS_IACCESS(ip, MACWRITE, credp))) { - REMOVE_DEBUG_TRACE(__LINE__); - goto error_return; - } - /* * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, 0); + error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, 0); if (error) { ASSERT(error != ENOENT); REMOVE_DEBUG_TRACE(__LINE__); @@ -2493,7 +2518,7 @@ xfs_remove( * remove transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -2522,7 +2547,7 @@ xfs_remove( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); + bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); IRELE(ip); @@ -2540,8 +2565,6 @@ xfs_remove( error1: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; - - error_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; @@ -2577,8 +2600,8 @@ xfs_remove( STATIC int xfs_link( bhv_desc_t *target_dir_bdp, - vnode_t *src_vp, - vname_t *dentry, + bhv_vnode_t *src_vp, + bhv_vname_t *dentry, cred_t *credp) { xfs_inode_t *tdp, *sip; @@ -2590,8 +2613,7 @@ xfs_link( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *target_dir_vp; - bhv_desc_t *src_bdp; + bhv_vnode_t *target_dir_vp; int resblks; char *target_name = VNAME(dentry); int target_namelen; @@ -2601,21 +2623,9 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (src_vp->v_type == VDIR) - return XFS_ERROR(EPERM); + ASSERT(!VN_ISDIR(src_vp)); - /* - * For now, manually find the XFS behavior descriptor for - * the source vnode. If it doesn't exist then something - * is wrong and we should just return an error. - * Eventually we need to figure out how link is going to - * work in the face of stacked vnodes. - */ - src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); - if (src_bdp == NULL) { - return XFS_ERROR(EXDEV); - } - sip = XFS_BHVTOI(src_bdp); + sip = xfs_vtoi(src_vp); tdp = XFS_BHVTOI(target_dir_bdp); mp = tdp->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) @@ -2681,14 +2691,24 @@ xfs_link( goto error_return; } + /* + * If we are using project inheritance, we only allow hard link + * creation in our tree when the project IDs are the same; else + * the tree quota mechanism could be circumvented. + */ + if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (tdp->i_d.di_projid != sip->i_d.di_projid))) { + error = XFS_ERROR(EXDEV); + goto error_return; + } + if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, - target_namelen))) + (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) goto error_return; XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, + error = xfs_dir_createname(tp, tdp, target_name, target_namelen, sip->i_ino, &first_block, &free_list, resblks); if (error) @@ -2698,16 +2718,15 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); - if (error) { + if (error) goto abort_return; - } /* * If this is a synchronous mount, make sure that the * link transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -2718,9 +2737,8 @@ xfs_link( } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + if (error) goto std_return; - } /* Fall through to std_return with error = 0. */ std_return: @@ -2736,11 +2754,13 @@ std_return: abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ + error_return: xfs_trans_cancel(tp, cancel_flags); - goto std_return; } + + /* * xfs_mkdir * @@ -2748,15 +2768,15 @@ std_return: STATIC int xfs_mkdir( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *dir_name = VNAME(dentry); xfs_inode_t *dp; xfs_inode_t *cdp; /* inode of created dir */ - vnode_t *cvp; /* vnode of created dir */ + bhv_vnode_t *cvp; /* vnode of created dir */ xfs_trans_t *tp; xfs_mount_t *mp; int cancel_flags; @@ -2764,7 +2784,7 @@ xfs_mkdir( int committed; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; boolean_t dp_joined_to_trans; boolean_t created = B_FALSE; int dm_event_sent = 0; @@ -2785,7 +2805,7 @@ xfs_mkdir( tp = NULL; dp_joined_to_trans = B_FALSE; - dm_di_mode = vap->va_mode|VTTOIF(vap->va_type); + dm_di_mode = vap->va_mode; if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, @@ -2803,7 +2823,9 @@ xfs_mkdir( mp = dp->i_mount; udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -2812,7 +2834,7 @@ xfs_mkdir( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2852,13 +2874,12 @@ xfs_mkdir( goto error_return; if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) + (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) goto error_return; /* * create the directory inode. */ - error = xfs_dir_ialloc(&tp, dp, - MAKEIMODE(vap->va_type,vap->va_mode), 2, + error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 0, credp, prid, resblks > 0, &cdp, NULL); if (error) { @@ -2880,9 +2901,9 @@ xfs_mkdir( XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, - cdp->i_ino, &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto error1; @@ -2896,16 +2917,14 @@ xfs_mkdir( */ dp->i_gen++; - error = XFS_DIR_INIT(mp, tp, cdp, dp); - if (error) { + error = xfs_dir_init(tp, cdp, dp); + if (error) goto error2; - } cdp->i_gen = 1; error = xfs_bumplink(tp, dp); - if (error) { + if (error) goto error2; - } cvp = XFS_ITOV(cdp); @@ -2924,7 +2943,7 @@ xfs_mkdir( * mkdir transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -2982,7 +3001,7 @@ std_return: STATIC int xfs_rmdir( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { char *name = VNAME(dentry); @@ -2995,8 +3014,8 @@ xfs_rmdir( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *dir_vp; - int dm_di_mode = 0; + bhv_vnode_t *dir_vp; + int dm_di_mode = S_IFDIR; int last_cdp_link; int namelen; uint resblks; @@ -3011,11 +3030,16 @@ xfs_rmdir( return XFS_ERROR(EIO); namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &cdp)) { + dm_di_mode = cdp->i_d.di_mode; + IRELE(cdp); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return XFS_ERROR(error); } @@ -3033,7 +3057,7 @@ xfs_rmdir( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &cdp); @@ -3109,25 +3133,20 @@ xfs_rmdir( ITRACE(cdp); xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); - if ((error = _MAC_XFS_IACCESS(cdp, MACWRITE, credp))) { - goto error_return; - } - ASSERT(cdp->i_d.di_nlink >= 2); if (cdp->i_d.di_nlink != 2) { error = XFS_ERROR(ENOTEMPTY); goto error_return; } - if (!XFS_DIR_ISEMPTY(mp, cdp)) { + if (!xfs_dir_isempty(cdp)) { error = XFS_ERROR(ENOTEMPTY); goto error_return; } - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, - &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3175,7 +3194,7 @@ xfs_rmdir( * rmdir transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -3198,13 +3217,13 @@ xfs_rmdir( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); + bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); IRELE(cdp); /* Fall through to std_return with error = 0 or the errno * from xfs_trans_commit. */ -std_return: + std_return: if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dir_vp, DM_RIGHT_NULL, @@ -3217,6 +3236,8 @@ std_return: error1: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; + /* FALLTHROUGH */ + error_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; @@ -3224,8 +3245,6 @@ std_return: /* - * xfs_readdir - * * Read dp's entries starting at uiop->uio_offset and translate them into * bufsize bytes worth of struct dirents starting at bufbase. */ @@ -3240,38 +3259,28 @@ xfs_readdir( xfs_trans_t *tp = NULL; int error = 0; uint lock_mode; - xfs_off_t start_offset; vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, (inst_t *)__return_address); dp = XFS_BHVTOI(dir_bdp); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); - } lock_mode = xfs_ilock_map_shared(dp); - start_offset = uiop->uio_offset; - error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); - if (start_offset != uiop->uio_offset) { - xfs_ichgtime(dp, XFS_ICHGTIME_ACC); - } + error = xfs_dir_getdents(tp, dp, uiop, eofp); xfs_iunlock_map_shared(dp, lock_mode); return error; } -/* - * xfs_symlink - * - */ STATIC int xfs_symlink( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, + bhv_vname_t *dentry, + bhv_vattr_t *vap, char *target_path, - vnode_t **vpp, + bhv_vnode_t **vpp, cred_t *credp) { xfs_trans_t *tp; @@ -3283,7 +3292,7 @@ xfs_symlink( xfs_bmap_free_t free_list; xfs_fsblock_t first_block; boolean_t dp_joined_to_trans; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -3328,7 +3337,7 @@ xfs_symlink( int len, total; char *path; - for(total = 0, path = target_path; total < pathlen;) { + for (total = 0, path = target_path; total < pathlen;) { /* * Skip any slashes. */ @@ -3361,7 +3370,9 @@ xfs_symlink( /* Return through std_return after this point. */ udqp = gdqp = NULL; - if (vap->va_mask & XFS_AT_PROJID) + if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + prid = dp->i_d.di_projid; + else if (vap->va_mask & XFS_AT_PROJID) prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -3370,7 +3381,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), + current_fsuid(credp), current_fsgid(credp), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -3401,6 +3412,14 @@ xfs_symlink( xfs_ilock(dp, XFS_ILOCK_EXCL); + /* + * Check whether the directory allows new symlinks or not. + */ + if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { + error = XFS_ERROR(EPERM); + goto error_return; + } + /* * Reserve disk quota : blocks and inode. */ @@ -3412,7 +3431,7 @@ xfs_symlink( * Check for ability to enter directory entry, if no space reserved. */ if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) + (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) goto error_return; /* * Initialize the bmap freelist prior to calling either @@ -3467,7 +3486,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list); + &free_list, NULL); if (error) { goto error1; } @@ -3499,11 +3518,10 @@ xfs_symlink( /* * Create the directory entry for the symlink. */ - error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, - ip->i_ino, &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -3519,7 +3537,7 @@ xfs_symlink( * symlink transaction goes to disk before returning to * the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } @@ -3551,7 +3569,7 @@ std_return: } if (!error) { - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ip); vp = XFS_ITOV(ip); @@ -3616,21 +3634,21 @@ xfs_fid2( int xfs_rwlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return 1; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { xfs_ilock(ip, XFS_IOLOCK_EXCL); } else if (locktype == VRWLOCK_TRY_READ) { - return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)); + return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); } else if (locktype == VRWLOCK_TRY_WRITE) { - return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)); + return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); } else { ASSERT((locktype == VRWLOCK_READ) || (locktype == VRWLOCK_WRITE_DIRECT)); @@ -3647,13 +3665,13 @@ xfs_rwlock( void xfs_rwunlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); - if (vp->v_type == VDIR) + if (VN_ISDIR(vp)) return; ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { @@ -3680,27 +3698,27 @@ xfs_inode_flush( { xfs_inode_t *ip; xfs_mount_t *mp; + xfs_inode_log_item_t *iip; int error = 0; ip = XFS_BHVTOI(bdp); mp = ip->i_mount; + iip = ip->i_itemp; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - /* Bypass inodes which have already been cleaned by + /* + * Bypass inodes which have already been cleaned by * the inode flush clustering code inside xfs_iflush */ if ((ip->i_update_core == 0) && - ((ip->i_itemp == NULL) || - !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL))) + ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) return 0; if (flags & FLUSH_LOG) { - xfs_inode_log_item_t *iip = ip->i_itemp; - if (iip && iip->ili_last_lsn) { - xlog_t *log = mp->m_log; + xlog_t *log = mp->m_log; xfs_lsn_t sync_lsn; int s, log_flags = XFS_LOG_FORCE; @@ -3713,12 +3731,12 @@ xfs_inode_flush( if (flags & FLUSH_SYNC) log_flags |= XFS_LOG_SYNC; - return xfs_log_force(mp, iip->ili_last_lsn, - log_flags); + return xfs_log_force(mp, iip->ili_last_lsn, log_flags); } } - /* We make this non-blocking if the inode is contended, + /* + * We make this non-blocking if the inode is contended, * return EAGAIN to indicate to the caller that they * did not succeed. This prevents the flush path from * blocking on inodes inside another operation right @@ -3754,7 +3772,6 @@ xfs_inode_flush( return error; } - int xfs_set_dmattrs ( bhv_desc_t *bdp, @@ -3795,75 +3812,46 @@ xfs_set_dmattrs ( return error; } - -/* - * xfs_reclaim - */ STATIC int xfs_reclaim( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); + ip = XFS_BHVTOI(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); ASSERT(!VN_MAPPED(vp)); - ip = XFS_BHVTOI(bdp); - if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (ip->i_d.di_size > 0) { - /* - * Flush and invalidate any data left around that is - * a part of this file. - * - * Get the inode's i/o lock so that buffers are pushed - * out while holding the proper lock. We can't hold - * the inode lock here since flushing out buffers may - * cause us to try to get the lock in xfs_strategy(). - * - * We don't have to call remapf() here, because there - * cannot be any mapped file references to this vnode - * since it is being reclaimed. - */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); + /* bad inode, get out here ASAP */ + if (VN_BAD(vp)) { + xfs_ireclaim(ip); + return 0; + } - /* - * If we hit an IO error, we need to make sure that the - * buffer and page caches of file data for - * the file are tossed away. We don't want to use - * VOP_FLUSHINVAL_PAGES here because we don't want dirty - * pages to stay attached to the vnode, but be - * marked P_BAD. pdflush/vnode_pagebad - * hates that. - */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE); - } else { - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - } + vn_iowait(vp); - ASSERT(VN_CACHED(vp) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || - ip->i_delayed_blks == 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - /* - * di_size field may not be quite accurate if we're - * shutting down. - */ - VOP_TOSS_PAGES(vp, 0, -1, FI_NONE); - ASSERT(VN_CACHED(vp) == 0); - } - } + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - /* If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode - * and the linux inode and clean up the xfs inode later. This - * avoids flushing the inode to disk during the delete operation - * itself. + /* + * Make sure the atime in the XFS inode is correct before freeing the + * Linux inode. + */ + xfs_synchronize_atime(ip); + + /* + * If we have nothing to flush with this inode then complete the + * teardown now, otherwise break the link between the xfs inode and the + * linux inode and clean up the xfs inode later. This avoids flushing + * the inode to disk during the delete operation itself. + * + * When breaking the link, we need to set the XFS_IRECLAIMABLE flag + * first to ensure that xfs_iunpin() will never see an xfs inode + * that has a linux inode being reclaimed. Synchronisation is provided + * by the i_flags_lock. */ if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -3872,11 +3860,13 @@ xfs_reclaim( } else { xfs_mount_t *mp = ip->i_mount; - /* Protect sync from us */ + /* Protect sync and unpin from us */ XFS_MOUNT_ILOCK(mp); + spin_lock(&ip->i_flags_lock); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); + spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - ip->i_flags |= XFS_IRECLAIMABLE; XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3889,25 +3879,31 @@ xfs_finish_reclaim( int sync_mode) { xfs_ihash_t *ih = ip->i_hash; + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); int error; + if (vp && VN_BAD(vp)) + goto reclaim; + /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. * Once we have the XFS_IRECLAIM flag set it will not touch * us. */ write_lock(&ih->ih_lock); - if ((ip->i_flags & XFS_IRECLAIM) || - (!(ip->i_flags & XFS_IRECLAIMABLE) && - (XFS_ITOV_NULL(ip) == NULL))) { + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); if (locked) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } - return(1); + return 1; } - ip->i_flags |= XFS_IRECLAIM; + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); /* @@ -3938,8 +3934,7 @@ xfs_finish_reclaim( */ if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return (0); + goto reclaim; } xfs_iflock(ip); /* synchronize with xfs_iflush_done */ } @@ -3958,6 +3953,7 @@ xfs_finish_reclaim( xfs_iunlock(ip, XFS_ILOCK_EXCL); } + reclaim: xfs_ireclaim(ip); return 0; } @@ -3966,15 +3962,13 @@ int xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) { int purged; - struct list_head *curr, *next; - xfs_inode_t *ip; + xfs_inode_t *ip, *n; int done = 0; while (!done) { purged = 0; XFS_MOUNT_ILOCK(mp); - list_for_each_safe(curr, next, &mp->m_del_inodes) { - ip = list_entry(curr, xfs_inode_t, i_reclaim); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { if (noblock) { if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) continue; @@ -3985,8 +3979,9 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) } } XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, noblock, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); + if (xfs_finish_reclaim(ip, noblock, + XFS_IFLUSH_DELWRI_ELSE_ASYNC)) + delay(1); purged = 1; break; } @@ -4017,7 +4012,7 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) * errno on error * */ -int +STATIC int xfs_alloc_file_space( xfs_inode_t *ip, xfs_off_t offset, @@ -4025,42 +4020,36 @@ xfs_alloc_file_space( int alloc_type, int attr_flags) { + xfs_mount_t *mp = ip->i_mount; + xfs_off_t count; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; - int committed; - xfs_off_t count; - xfs_filblks_t datablocks; - int error; + xfs_extlen_t extsz, temp; + xfs_fileoff_t startoffset_fsb; xfs_fsblock_t firstfsb; - xfs_bmap_free_t free_list; - xfs_bmbt_irec_t *imapp; - xfs_bmbt_irec_t imaps[1]; - xfs_mount_t *mp; - int numrtextents; - int reccount; - uint resblks; + int nimaps; + int bmapi_flag; + int quota_flag; int rt; - int rtextsize; - xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; - int xfs_bmapi_flags; + xfs_bmbt_irec_t imaps[1], *imapp; + xfs_bmap_free_t free_list; + uint qblocks, resblks, resrtextents; + int committed; + int error; vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); - mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - /* - * determine if this is a realtime file - */ - if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) { - if (ip->i_d.di_extsize) - rtextsize = ip->i_d.di_extsize; - else - rtextsize = mp->m_sb.sb_rextsize; - } else - rtextsize = 0; + rt = XFS_IS_REALTIME_INODE(ip); + if (unlikely(rt)) { + if (!(extsz = ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + } else { + extsz = ip->i_d.di_extsize; + } if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4071,8 +4060,8 @@ xfs_alloc_file_space( count = len; error = 0; imapp = &imaps[0]; - reccount = 1; - xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + nimaps = 1; + bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); @@ -4089,47 +4078,55 @@ xfs_alloc_file_space( offset, end_dmi_offset - offset, 0, NULL); if (error) - return(error); + return error; } /* - * allocate file space until done or until there is an error + * Allocate file space until done or until there is an error */ retry: while (allocatesize_fsb && !error) { + xfs_fileoff_t s, e; + /* - * determine if reserving space on - * the data or realtime partition. + * Determine space reservations for data/realtime. */ - if (rt) { - xfs_fileoff_t s, e; - + if (unlikely(extsz)) { s = startoffset_fsb; - do_div(s, rtextsize); - s *= rtextsize; - e = roundup_64(startoffset_fsb + allocatesize_fsb, - rtextsize); - numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize; - datablocks = 0; + do_div(s, extsz); + s *= extsz; + e = startoffset_fsb + allocatesize_fsb; + if ((temp = do_mod(startoffset_fsb, extsz))) + e += temp; + if ((temp = do_mod(e, extsz))) + e += extsz - temp; + } else { + s = 0; + e = allocatesize_fsb; + } + + if (unlikely(rt)) { + resrtextents = qblocks = (uint)(e - s); + resrtextents /= mp->m_sb.sb_rextsize; + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + quota_flag = XFS_QMOPT_RES_RTBLKS; } else { - datablocks = allocatesize_fsb; - numrtextents = 0; + resrtextents = 0; + resblks = qblocks = \ + XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); + quota_flag = XFS_QMOPT_RES_REGBLKS; } /* - * allocate and setup the transaction + * Allocate and setup the transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); - error = xfs_trans_reserve(tp, - resblks, - XFS_WRITE_LOG_RES(mp), - numrtextents, + error = xfs_trans_reserve(tp, resblks, + XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); - /* - * check for running out of space + * Check for running out of space */ if (error) { /* @@ -4140,9 +4137,8 @@ retry: break; } xfs_ilock(ip, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, + qblocks, 0, quota_flag); if (error) goto error1; @@ -4150,19 +4146,19 @@ retry: xfs_trans_ihold(tp, ip); /* - * issue the bmapi() call to allocate the blocks + * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, - allocatesize_fsb, xfs_bmapi_flags, - &firstfsb, 0, imapp, &reccount, - &free_list); + error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, + allocatesize_fsb, bmapi_flag, + &firstfsb, 0, imapp, &nimaps, + &free_list, NULL); if (error) { goto error0; } /* - * complete the transaction + * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); if (error) { @@ -4177,7 +4173,7 @@ retry: allocated_fsb = imapp->br_blockcount; - if (reccount == 0) { + if (nimaps == 0) { error = XFS_ERROR(ENOSPC); break; } @@ -4200,9 +4196,11 @@ dmapi_enospc_check: return error; - error0: +error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); - error1: + XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); + +error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto dmapi_enospc_check; @@ -4234,8 +4232,8 @@ xfs_zero_remaining_bytes( for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, - &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -4294,6 +4292,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { + bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4308,14 +4307,17 @@ xfs_free_file_space( xfs_mount_t *mp; int nimap; uint resblks; - int rounding; + uint rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; + int need_iolock = 1; - vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); + vp = XFS_ITOV(ip); mp = ip->i_mount; + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4332,21 +4334,33 @@ xfs_free_file_space( DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { if (end_dmi_offset > ip->i_d.di_size) end_dmi_offset = ip->i_d.di_size; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); if (error) - return(error); + return error; + } + + if (attr_flags & ATTR_NOLOCK) + need_iolock = 0; + if (need_iolock) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + vn_iowait(vp); /* wait for the completion of any pending DIOs */ } - xfs_ilock(ip, XFS_IOLOCK_EXCL); - rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), - (__uint8_t)NBPP); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); ilen = len + (offset & (rounding - 1)); ioffset = offset & ~(rounding - 1); if (ilen & (rounding - 1)) ilen = (ilen + rounding) & ~(rounding - 1); - xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0); + + if (VN_CACHED(vp) != 0) { + xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, + ctooff(offtoct(ioffset)), -1); + bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), + -1, FI_REMAPF_LOCKED); + } + /* * Need to zero the stuff we're not freeing, on disk. * If its a realtime file & can't use unwritten extents then we @@ -4355,10 +4369,10 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) - return error; + goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; @@ -4370,10 +4384,10 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) - return error; + goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); @@ -4431,8 +4445,8 @@ xfs_free_file_space( } xfs_ilock(ip, XFS_ILOCK_EXCL); error = XFS_TRANS_RESERVE_QUOTA(mp, tp, - ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + ip->i_udquot, ip->i_gdquot, resblks, 0, + XFS_QMOPT_RES_REGBLKS); if (error) goto error1; @@ -4443,9 +4457,9 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, + error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, &done); + 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { goto error0; } @@ -4462,14 +4476,17 @@ xfs_free_file_space( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + out_unlock_iolock: + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : + XFS_ILOCK_EXCL); return error; } @@ -4502,8 +4519,8 @@ xfs_change_file_space( xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - vattr_t va; - vnode_t *vp; + bhv_vattr_t va; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -4514,7 +4531,7 @@ xfs_change_file_space( /* * must be a regular file and have write permission */ - if (vp->v_type != VREG) + if (!VN_ISREG(vp)) return XFS_ERROR(EINVAL); xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -4626,20 +4643,21 @@ xfs_change_file_space( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); - ip->i_d.di_mode &= ~S_ISUID; - - /* - * Note that we don't have to worry about mandatory - * file locking being disabled here because we only - * clear the S_ISGID bit if the Group execute bit is - * on, but if it was on then mandatory locking wouldn't - * have been enabled. - */ - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; + if ((attr_flags & ATTR_DMI) == 0) { + ip->i_d.di_mode &= ~S_ISUID; - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + /* + * Note that we don't have to worry about mandatory + * file locking being disabled here because we only + * clear the S_ISGID bit if the Group execute bit is + * on, but if it was on then mandatory locking wouldn't + * have been enabled. + */ + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } if (setprealloc) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; else if (clrprealloc) @@ -4655,12 +4673,17 @@ xfs_change_file_space( return error; } -vnodeops_t xfs_vnodeops = { +bhv_vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, + .vop_close = xfs_close, .vop_read = xfs_read, #ifdef HAVE_SENDFILE .vop_sendfile = xfs_sendfile, +#endif +#ifdef HAVE_SPLICE + .vop_splice_read = xfs_splice_read, + .vop_splice_write = xfs_splice_write, #endif .vop_write = xfs_write, .vop_ioctl = xfs_ioctl,