X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_iget.c;h=acf95355c489c0346c000b08da37c103df33b717;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=07641fc90d61350d834ce2b081382ddbaec32c8e;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 07641fc90..acf95355c 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -1,79 +1,73 @@ /* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * Further, this software is distributed without any warranty that it is - * free of the rightful claim of any third person regarding infringement - * or the like. Any license provided herein, whether implied or - * otherwise, applies only to this software file. Patent licenses, if - * any, provided herein do not apply to combinations of this program with - * other software, or any other product whatsoever. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * - * For further information regarding this notice, see: - * - * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - #include "xfs.h" - -#include "xfs_macros.h" +#include "xfs_fs.h" #include "xfs_types.h" -#include "xfs_inum.h" +#include "xfs_bit.h" #include "xfs_log.h" +#include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_alloc_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_attr_sf.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_utils.h" /* * Initialize the inode hash table for the newly mounted file system. - * - * mp -- this is the mount point structure for the file system being - * initialized + * Choose an initial table size based on user specified value, else + * use a simple algorithm using the maximum number of inodes as an + * indicator for table size, and clamp it between one and some large + * number of pages. */ void xfs_ihash_init(xfs_mount_t *mp) { - int i; + __uint64_t icount; + uint i; + + if (!mp->m_ihsize) { + icount = mp->m_maxicount ? mp->m_maxicount : + (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog); + mp->m_ihsize = 1 << max_t(uint, 8, + (xfs_highbit64(icount) + 1) / 2); + mp->m_ihsize = min_t(uint, mp->m_ihsize, + (64 * NBPP) / sizeof(xfs_ihash_t)); + } - mp->m_ihsize = XFS_BUCKETS(mp); - mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize - * sizeof(xfs_ihash_t), KM_SLEEP); - ASSERT(mp->m_ihash != NULL); - for (i = 0; i < mp->m_ihsize; i++) { + mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize, + NBPC * sizeof(xfs_ihash_t), + mp->m_ihsize * sizeof(xfs_ihash_t), + KM_SLEEP | KM_MAYFAIL | KM_LARGE); + mp->m_ihsize /= sizeof(xfs_ihash_t); + for (i = 0; i < mp->m_ihsize; i++) rwlock_init(&(mp->m_ihash[i].ih_lock)); - } } /* @@ -82,35 +76,25 @@ xfs_ihash_init(xfs_mount_t *mp) void xfs_ihash_free(xfs_mount_t *mp) { - kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t)); + kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t)); mp->m_ihash = NULL; } /* * Initialize the inode cluster hash table for the newly mounted file system. - * - * mp -- this is the mount point structure for the file system being - * initialized + * Its size is derived from the ihash table size. */ void xfs_chash_init(xfs_mount_t *mp) { - int i; + uint i; - /* - * m_chash size is based on m_ihash - * with a minimum of 37 entries - */ - mp->m_chsize = (XFS_BUCKETS(mp)) / - (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); - if (mp->m_chsize < 37) { - mp->m_chsize = 37; - } + mp->m_chsize = max_t(uint, 1, mp->m_ihsize / + (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)); + mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize); mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize * sizeof(xfs_chash_t), - KM_SLEEP); - ASSERT(mp->m_chash != NULL); - + KM_SLEEP | KM_LARGE); for (i = 0; i < mp->m_chsize; i++) { spinlock_init(&mp->m_chash[i].ch_lock,"xfshash"); } @@ -132,6 +116,40 @@ xfs_chash_free(xfs_mount_t *mp) mp->m_chash = NULL; } +/* + * Try to move an inode to the front of its hash list if possible + * (and if its not there already). Called right after obtaining + * the list version number and then dropping the read_lock on the + * hash list in question (which is done right after looking up the + * inode in question...). + */ +STATIC void +xfs_ihash_promote( + xfs_ihash_t *ih, + xfs_inode_t *ip, + ulong version) +{ + xfs_inode_t *iq; + + if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) { + if (likely(version == ih->ih_version)) { + /* remove from list */ + if ((iq = ip->i_next)) { + iq->i_prevp = ip->i_prevp; + } + *ip->i_prevp = iq; + + /* insert at list head */ + iq = ih->ih_next; + iq->i_prevp = &ip->i_next; + ip->i_next = iq; + ip->i_prevp = &ih->ih_next; + ih->ih_next = ip; + } + write_unlock(&ih->ih_lock); + } +} + /* * Look up an inode by number in the given file system. * The inode is looked up in the hash table for the file system @@ -165,10 +183,11 @@ xfs_chash_free(xfs_mount_t *mp) */ STATIC int xfs_iget_core( - vnode_t *vp, + bhv_vnode_t *vp, xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, + uint flags, uint lock_flags, xfs_inode_t **ipp, xfs_daddr_t bno) @@ -176,11 +195,10 @@ xfs_iget_core( xfs_ihash_t *ih; xfs_inode_t *ip; xfs_inode_t *iq; - vnode_t *inode_vp; + bhv_vnode_t *inode_vp; ulong version; int error; /* REFERENCED */ - int newnode; xfs_chash_t *ch; xfs_chashlist_t *chl, *chlnew; SPLDECL(s); @@ -193,29 +211,72 @@ again: for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { if (ip->i_ino == ino) { + /* + * If INEW is set this inode is being set up + * we need to pause and try again. + */ + if (xfs_iflags_test(ip, XFS_INEW)) { + read_unlock(&ih->ih_lock); + delay(1); + XFS_STATS_INC(xs_ig_frecycle); - inode_vp = XFS_ITOV_NULL(ip); + goto again; + } + inode_vp = XFS_ITOV_NULL(ip); if (inode_vp == NULL) { - /* If IRECLAIM is set this inode is + /* + * If IRECLAIM is set this inode is * on its way out of the system, * we need to pause and try again. */ - if (ip->i_flags & XFS_IRECLAIM) { + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&ih->ih_lock); delay(1); XFS_STATS_INC(xs_ig_frecycle); goto again; } + ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + + /* + * If lookup is racing with unlink, then we + * should return an error immediately so we + * don't remove it from the reclaim list and + * potentially leak the inode. + */ + if ((ip->i_d.di_mode == 0) && + !(flags & XFS_IGET_CREATE)) { + read_unlock(&ih->ih_lock); + return ENOENT; + } + + /* + * There may be transactions sitting in the + * incore log buffers or being flushed to disk + * at this time. We can't clear the + * XFS_IRECLAIMABLE flag until these + * transactions have hit the disk, otherwise we + * will void the guarantee the flag provides + * xfs_iunpin() + */ + if (xfs_ipincount(ip)) { + read_unlock(&ih->ih_lock); + xfs_log_force(mp, 0, + XFS_LOG_FORCE|XFS_LOG_SYNC); + XFS_STATS_INC(xs_ig_frecycle); + goto again; + } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); XFS_STATS_INC(xs_ig_found); - ip->i_flags &= ~XFS_IRECLAIMABLE; + xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + version = ih->ih_version; read_unlock(&ih->ih_lock); + xfs_ihash_promote(ih, ip, version); XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); @@ -224,7 +285,7 @@ again: goto finish_inode; } else if (vp != inode_vp) { - struct inode *inode = LINVFS_GET_IP(inode_vp); + struct inode *inode = vn_to_inode(inode_vp); /* The inode is being torn down, pause and * try again. @@ -245,21 +306,28 @@ again: inode_vp, vp); } + /* + * Inode cache hit: if ip is not at the front of + * its hash chain, move it there now. + * Do this with the lock held for update, but + * do statistics after releasing the lock. + */ + version = ih->ih_version; read_unlock(&ih->ih_lock); - + xfs_ihash_promote(ih, ip, version); XFS_STATS_INC(xs_ig_found); finish_inode: - if (lock_flags != 0) { - xfs_ilock(ip, lock_flags); - } - - newnode = (ip->i_d.di_mode == 0); - if (newnode) { + if (ip->i_d.di_mode == 0) { + if (!(flags & XFS_IGET_CREATE)) + return ENOENT; xfs_iocore_inode_reinit(ip); } - ip->i_flags &= ~XFS_ISTALE; + if (lock_flags != 0) + xfs_ilock(ip, lock_flags); + + xfs_iflags_clear(ip, XFS_ISTALE); vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); goto return_ip; @@ -280,18 +348,22 @@ finish_inode: * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ - error = xfs_iread(mp, tp, ino, &ip, bno); - if (error) { + error = xfs_iread(mp, tp, ino, &ip, bno, + (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); + if (error) return error; - } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); xfs_inode_lock_init(ip, vp); xfs_iocore_inode_init(ip); - if (lock_flags != 0) { + if (lock_flags) xfs_ilock(ip, lock_flags); + + if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { + xfs_idestroy(ip); + return ENOENT; } /* @@ -324,7 +396,7 @@ finish_inode: ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; - + xfs_iflags_set(ip, XFS_INEW); write_unlock(&ih->ih_lock); /* @@ -373,7 +445,10 @@ finish_inode: ip->i_chash = chlnew; chlnew->chl_ip = ip; chlnew->chl_blkno = ip->i_blkno; + if (ch->ch_list) + ch->ch_list->chl_prev = chlnew; chlnew->chl_next = ch->ch_list; + chlnew->chl_prev = NULL; ch->ch_list = chlnew; chlnew = NULL; } @@ -404,8 +479,6 @@ finish_inode: XFS_MOUNT_IUNLOCK(mp); - newnode = 1; - return_ip: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); @@ -419,7 +492,7 @@ finish_inode: * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); + bhv_vfs_init_vnode(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); return 0; } @@ -434,28 +507,26 @@ xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, + uint flags, uint lock_flags, xfs_inode_t **ipp, xfs_daddr_t bno) { struct inode *inode; - vnode_t *vp = NULL; + bhv_vnode_t *vp = NULL; int error; -retry: XFS_STATS_INC(xs_ig_attempts); +retry: if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { - bhv_desc_t *bdp; xfs_inode_t *ip; - int newnode; - vp = LINVFS_GET_VP(inode); + vp = vn_from_inode(inode); if (inode->i_state & I_NEW) { -inode_allocate: vn_initialize(inode); - error = xfs_iget_core(vp, mp, tp, ino, - lock_flags, ipp, bno); + error = xfs_iget_core(vp, mp, tp, ino, flags, + lock_flags, ipp, bno); if (error) { vn_mark_bad(vp); if (inode->i_state & I_NEW) @@ -463,27 +534,23 @@ inode_allocate: iput(inode); } } else { - /* These are true if the inode is in inactive or - * reclaim. The linux inode is about to go away, - * wait for that path to finish, and try again. + /* + * If the inode is not fully constructed due to + * filehandle mismatches wait for the inode to go + * away and try again. + * + * iget_locked will call __wait_on_freeing_inode + * to wait for the inode to go away. */ - if (vp->v_flag & (VINACT | VRECLM)) { - vn_wait(vp); + if (is_bad_inode(inode) || + ((ip = xfs_vtoi(vp)) == NULL)) { iput(inode); + delay(1); goto retry; } - bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); - if (bdp == NULL) { - XFS_STATS_INC(xs_ig_dup); - goto inode_allocate; - } - ip = XFS_BHVTOI(bdp); if (lock_flags != 0) xfs_ilock(ip, lock_flags); - newnode = (ip->i_d.di_mode == 0); - if (newnode) - xfs_iocore_inode_reinit(ip); XFS_STATS_INC(xs_ig_found); *ipp = ip; error = 0; @@ -500,14 +567,14 @@ inode_allocate: void xfs_inode_lock_init( xfs_inode_t *ip, - vnode_t *vp) + bhv_vnode_t *vp) { mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", (long)vp->v_number); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number); init_waitqueue_head(&ip->i_ipin_wait); atomic_set(&ip->i_pincount, 0); - init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number); + initnsema(&ip->i_flock, 1, "xfsfino"); } /* @@ -522,6 +589,7 @@ xfs_inode_incore(xfs_mount_t *mp, { xfs_ihash_t *ih; xfs_inode_t *ip; + ulong version; ih = XFS_IHASH(mp, ino); read_lock(&ih->ih_lock); @@ -529,11 +597,15 @@ xfs_inode_incore(xfs_mount_t *mp, if (ip->i_ino == ino) { /* * If we find it and tp matches, return it. + * Also move it to the front of the hash list + * if we find it and it is not already there. * Otherwise break from the loop and return * NULL. */ if (ip->i_transp == tp) { + version = ih->ih_version; read_unlock(&ih->ih_lock); + xfs_ihash_promote(ih, ip, version); return (ip); } break; @@ -555,12 +627,10 @@ void xfs_iput(xfs_inode_t *ip, uint lock_flags) { - vnode_t *vp = XFS_ITOV(ip); + bhv_vnode_t *vp = XFS_ITOV(ip); vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address); - xfs_iunlock(ip, lock_flags); - VN_RELE(vp); } @@ -571,11 +641,15 @@ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { - vnode_t *vp = XFS_ITOV(ip); - struct inode *inode = LINVFS_GET_IP(vp); + bhv_vnode_t *vp = XFS_ITOV(ip); + struct inode *inode = vn_to_inode(vp); vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); + if ((ip->i_d.di_mode == 0)) { + ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + vn_mark_bad(vp); + } if (inode->i_state & I_NEW) unlock_new_inode(inode); if (lock_flags) @@ -593,7 +667,7 @@ xfs_iput_new(xfs_inode_t *ip, void xfs_ireclaim(xfs_inode_t *ip) { - vnode_t *vp; + bhv_vnode_t *vp; /* * Remove from old hash list and mount list. @@ -628,9 +702,12 @@ xfs_ireclaim(xfs_inode_t *ip) vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); } + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + /* * Free all memory associated with the inode. */ + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_idestroy(ip); } @@ -656,6 +733,7 @@ xfs_iextract( iq->i_prevp = ip->i_prevp; } *ip->i_prevp = iq; + ih->ih_version++; write_unlock(&ih->ih_lock); /* @@ -673,23 +751,15 @@ xfs_iextract( ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); ASSERT(ip->i_chash != NULL); chm=NULL; - for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { - if (chl->chl_blkno == ip->i_blkno) { - if (chm == NULL) { - /* first item on the list */ - ch->ch_list = chl->chl_next; - } else { - chm->chl_next = chl->chl_next; - } - kmem_zone_free(xfs_chashlist_zone, chl); - break; - } else { - ASSERT(chl->chl_ip != ip); - chm = chl; - } - } - ASSERT_ALWAYS(chl != NULL); - } else { + chl = ip->i_chash; + if (chl->chl_prev) + chl->chl_prev->chl_next = chl->chl_next; + else + ch->ch_list = chl->chl_next; + if (chl->chl_next) + chl->chl_next->chl_prev = chl->chl_prev; + kmem_zone_free(xfs_chashlist_zone, chl); + } else { /* delete one inode from a non-empty list */ iq = ip->i_cnext; iq->i_cprev = ip->i_cprev; @@ -988,6 +1058,6 @@ xfs_iflock_nowait(xfs_inode_t *ip) void xfs_ifunlock(xfs_inode_t *ip) { - ASSERT(valusema(&(ip->i_flock)) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); vsema(&(ip->i_flock)); }