X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_inode.c;h=0b56b0034182deab0c5b62987001ff0ca3a830c3;hb=refs%2Fheads%2Fvserver;hp=33f589f5eac2d9bc2e90c23050584f076f722c19;hpb=43bc926fffd92024b46cafaf7350d669ba9ca884;p=linux-2.6.git diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 33f589f5e..0b56b0034 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -26,14 +26,12 @@ #include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -52,7 +50,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" -#include +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -257,13 +255,11 @@ xfs_itobp( xfs_daddr_t bno, uint imap_flags) { + xfs_imap_t imap; xfs_buf_t *bp; int error; - xfs_imap_t imap; -#ifdef __KERNEL__ int i; int ni; -#endif if (ip->i_blkno == (xfs_daddr_t)0) { /* @@ -320,7 +316,6 @@ xfs_itobp( */ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, (int)imap.im_len, XFS_BUF_LOCK, &bp); - if (error) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " @@ -331,17 +326,20 @@ xfs_itobp( #endif /* DEBUG */ return error; } -#ifdef __KERNEL__ + /* * Validate the magic number and version of every inode in the buffer * (if DEBUG kernel) or the first inode in the buffer, otherwise. + * No validation is done here in userspace (xfs_repair). */ -#ifdef DEBUG - ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : - (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); -#else - ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; +#if !defined(__KERNEL__) + ni = 0; +#elif defined(DEBUG) + ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; +#else /* usual case */ + ni = 1; #endif + for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; @@ -350,11 +348,18 @@ xfs_itobp( (i << mp->m_sb.sb_inodelog)); di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, + XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { + if (imap_flags & XFS_IMAP_BULKSTAT) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EINVAL); + } #ifdef DEBUG - prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", - mp->m_ddev_targp, + cmn_err(CE_ALERT, + "Device %s - bad inode magic/vsn " + "daddr %lld #%d (magic=%x)", + XFS_BUFTARG_NAME(mp->m_ddev_targp), (unsigned long long)imap.im_blkno, i, INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); #endif @@ -364,7 +369,6 @@ xfs_itobp( return XFS_ERROR(EFSCORRUPTED); } } -#endif /* __KERNEL__ */ xfs_inobp_check(mp, bp); @@ -734,16 +738,16 @@ xfs_xlate_dinode_core( xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; uint32_t uid = 0, gid = 0; - uint16_t xid = 0; + uint16_t tag = 0; ASSERT(dir); if (dir < 0) { - xid = mem_core->di_xid; + tag = mem_core->di_tag; /* FIXME: supposed to use superblock flag */ - uid = XIDINO_UID(1, mem_core->di_uid, xid); - gid = XIDINO_GID(1, mem_core->di_gid, xid); - xid = XIDINO_XID(1, xid); + uid = TAGINO_UID(1, mem_core->di_uid, tag); + gid = TAGINO_GID(1, mem_core->di_gid, tag); + tag = TAGINO_TAG(1, tag); } INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); @@ -753,15 +757,15 @@ xfs_xlate_dinode_core( INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); INT_XLATE(buf_core->di_uid, uid, dir, arch); INT_XLATE(buf_core->di_gid, gid, dir, arch); - INT_XLATE(buf_core->di_xid, xid, dir, arch); + INT_XLATE(buf_core->di_tag, tag, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { /* FIXME: supposed to use superblock flag */ - mem_core->di_uid = INOXID_UID(1, uid, gid); - mem_core->di_gid = INOXID_GID(1, uid, gid); - mem_core->di_xid = INOXID_XID(1, uid, gid, xid); + mem_core->di_uid = INOTAG_UID(1, uid, gid); + mem_core->di_gid = INOTAG_GID(1, uid, gid); + mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -798,7 +802,6 @@ xfs_xlate_dinode_core( STATIC uint _xfs_dic2xflags( - xfs_dinode_core_t *dic, __uint16_t di_flags) { uint flags = 0; @@ -832,6 +835,8 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_EXTSIZE; if (di_flags & XFS_DIFLAG_EXTSZINHERIT) flags |= XFS_XFLAG_EXTSZINHERIT; + if (di_flags & XFS_DIFLAG_NODEFRAG) + flags |= XFS_XFLAG_NODEFRAG; } return flags; @@ -843,16 +848,16 @@ xfs_ip2xflags( { xfs_dinode_core_t *dic = &ip->i_d; - return _xfs_dic2xflags(dic, dic->di_flags) | - (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(dic->di_flags) | + (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); } uint xfs_dic2xflags( xfs_dinode_core_t *dic) { - return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | - (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) | + (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); } /* @@ -869,7 +874,8 @@ xfs_iread( xfs_trans_t *tp, xfs_ino_t ino, xfs_inode_t **ipp, - xfs_daddr_t bno) + xfs_daddr_t bno, + uint imap_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; @@ -881,6 +887,7 @@ xfs_iread( ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -889,7 +896,7 @@ xfs_iread( * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will * know that this is a new incore inode. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); if (error) { kmem_zone_free(xfs_inode_zone, ip); return error; @@ -1103,7 +1110,7 @@ xfs_ialloc( { xfs_ino_t ino; xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; uint flags; int error; @@ -1128,7 +1135,7 @@ xfs_ialloc( * to prevent others from looking at until we're done. */ error = xfs_trans_iget(tp->t_mountp, tp, ino, - IGET_CREATE, XFS_ILOCK_EXCL, &ip); + XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error != 0) { return error; } @@ -1141,7 +1148,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); - ip->i_d.di_xid = current_fsxid(cr, vp); + ip->i_d.di_tag = current_fstag(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); @@ -1242,6 +1249,9 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NOSYMLINKS; if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && + xfs_inherit_nodefrag) + di_flags |= XFS_DIFLAG_NODEFRAG; ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ @@ -1265,8 +1275,8 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have an i_mode we can set Linux inode ops (& unlock) */ - VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); + /* now that we have an i_mode we can setup inode ops and unlock */ + bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; return 0; @@ -1306,7 +1316,7 @@ xfs_isize_check( (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, - NULL)) + NULL, NULL)) return; ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); @@ -1442,7 +1452,7 @@ xfs_itruncate_start( xfs_fsize_t last_byte; xfs_off_t toss_start; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); @@ -1455,9 +1465,9 @@ xfs_itruncate_start( vn_iowait(vp); /* wait for the completion of any pending DIOs */ /* - * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers + * Call toss_pages or flushinval_pages to get rid of pages * overlapping the region being removed. We have to use - * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the + * the less efficient flushinval_pages in the case that the * caller may not be able to finish the truncate without * dropping the inode's I/O lock. Make sure * to catch any pages brought in by buffers overlapping @@ -1466,10 +1476,10 @@ xfs_itruncate_start( * so that we don't toss things on the same block as * new_size but before it. * - * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to + * Before calling toss_page or flushinval_pages, make sure to * call remapf() over the same region if the file is mapped. * This frees up mapped file references to the pages in the - * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures + * given range and for the flushinval_pages case it ensures * that we get the latest mapped changes flushed out. */ toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); @@ -1487,9 +1497,9 @@ xfs_itruncate_start( last_byte); if (last_byte > toss_start) { if (flags & XFS_ITRUNC_DEFINITE) { - VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } else { - VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } } @@ -1687,12 +1697,13 @@ xfs_itruncate_finish( * runs. */ XFS_BMAP_INIT(&free_list, &first_block); - error = xfs_bunmapi(ntp, ip, first_unmap_block, - unmap_len, + error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, + first_unmap_block, unmap_len, XFS_BMAPI_AFLAG(fork) | (sync ? 0 : XFS_BMAPI_ASYNC), XFS_ITRUNC_MAX_EXTENTS, - &first_block, &free_list, &done); + &first_block, &free_list, + NULL, &done); if (error) { /* * If the bunmapi call encounters an error, @@ -1976,9 +1987,9 @@ xfs_iunlink_remove( xfs_agino_t agino; xfs_agino_t next_agino; xfs_buf_t *last_ibp; - xfs_dinode_t *last_dip; + xfs_dinode_t *last_dip = NULL; short bucket_index; - int offset, last_offset; + int offset, last_offset = 0; int error; int agi_ok; @@ -2203,7 +2214,7 @@ xfs_ifree_cluster( /* Inode not in memory or we found it already, * nothing to do */ - if (!ip || (ip->i_flags & XFS_ISTALE)) { + if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { read_unlock(&ih->ih_lock); continue; } @@ -2225,8 +2236,7 @@ xfs_ifree_cluster( if (ip == free_ip) { if (xfs_iflock_nowait(ip)) { - ip->i_flags |= XFS_ISTALE; - + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); } else { @@ -2239,7 +2249,7 @@ xfs_ifree_cluster( if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { if (xfs_iflock_nowait(ip)) { - ip->i_flags |= XFS_ISTALE; + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2269,7 +2279,7 @@ xfs_ifree_cluster( AIL_LOCK(mp,s); iip->ili_flush_lsn = iip->ili_item.li_lsn; AIL_UNLOCK(mp, s); - iip->ili_inode->i_flags |= XFS_ISTALE; + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } lip = lip->li_bio_list; @@ -2752,30 +2762,38 @@ xfs_iunpin( { ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_test(&ip->i_pincount)) { + if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { + /* - * If the inode is currently being reclaimed, the - * linux inode _and_ the xfs vnode may have been - * freed so we cannot reference either of them safely. - * Hence we should not try to do anything to them - * if the xfs inode is currently in the reclaim - * path. + * If the inode is currently being reclaimed, the link between + * the bhv_vnode and the xfs_inode will be broken after the + * XFS_IRECLAIM* flag is set. Hence, if these flags are not + * set, then we can move forward and mark the linux inode dirty + * knowing that it is still valid as it won't freed until after + * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The + * i_flags_lock is used to synchronise the setting of the + * XFS_IRECLAIM* flags and the breaking of the link, and so we + * can execute atomically w.r.t to reclaim by holding this lock + * here. * - * However, we still need to issue the unpin wakeup - * call as the inode reclaim may be blocked waiting for - * the inode to become unpinned. + * However, we still need to issue the unpin wakeup call as the + * inode reclaim may be blocked waiting for the inode to become + * unpinned. */ - if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { - vnode_t *vp = XFS_ITOV_NULL(ip); - /* make sync come back and flush this inode */ - if (vp) { - struct inode *inode = vn_to_inode(vp); + if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *inode = NULL; - if (!(inode->i_state & I_NEW)) - mark_inode_dirty_sync(inode); - } + BUG_ON(vp == NULL); + inode = vn_to_inode(vp); + BUG_ON(inode->i_state & I_CLEAR); + + /* make sync come back and flush this inode */ + if (!(inode->i_state & (I_NEW|I_FREEING))) + mark_inode_dirty_sync(inode); } + spin_unlock(&ip->i_flags_lock); wake_up(&ip->i_ipin_wait); } } @@ -2937,13 +2955,6 @@ xfs_iflush_fork( ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); } - if (whichfork == XFS_DATA_FORK) { - if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) { - XFS_ERROR_REPORT("xfs_iflush_fork", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } - } break; case XFS_DINODE_FMT_EXTENTS: @@ -3027,7 +3038,7 @@ xfs_iflush( XFS_STATS_INC(xs_iflush_count); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3220,7 +3231,7 @@ xfs_iflush( corrupt_out: xfs_buf_relse(bp); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_iflush_abort(ip); /* * Unlocks the flush lock @@ -3242,7 +3253,7 @@ cluster_corrupt_out: xfs_buf_relse(bp); } - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); if(!bufwasdelwri) { /* @@ -3285,7 +3296,7 @@ xfs_iflush_int( SPLDECL(s); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3525,7 +3536,7 @@ xfs_iflush_all( xfs_mount_t *mp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; again: XFS_MOUNT_ILOCK(mp); @@ -4201,7 +4212,7 @@ xfs_iext_direct_to_inline( */ memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, nextents * sizeof(xfs_bmbt_rec_t)); - kmem_free(ifp->if_u1.if_extents, KM_SLEEP); + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; }