X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_inode.c;fp=fs%2Fxfs%2Fxfs_inode.c;h=0b56b0034182deab0c5b62987001ff0ca3a830c3;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=6a4089ad36d5459cf4835473b3a1fc0d5d064f1a;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6a4089ad3..0b56b0034 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -26,14 +26,12 @@ #include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" @@ -52,7 +50,7 @@ #include "xfs_mac.h" #include "xfs_acl.h" -#include +#include kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; @@ -77,16 +75,18 @@ STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); */ STATIC void xfs_validate_extents( - xfs_bmbt_rec_t *ep, + xfs_ifork_t *ifp, int nrecs, int disk, xfs_exntfmt_t fmt) { + xfs_bmbt_rec_t *ep; xfs_bmbt_irec_t irec; xfs_bmbt_rec_t rec; int i; for (i = 0; i < nrecs; i++) { + ep = xfs_iext_get_ext(ifp, i); rec.l0 = get_unaligned((__uint64_t*)&ep->l0); rec.l1 = get_unaligned((__uint64_t*)&ep->l1); if (disk) @@ -95,11 +95,10 @@ xfs_validate_extents( xfs_bmbt_get_all(&rec, &irec); if (fmt == XFS_EXTFMT_NOSTATE) ASSERT(irec.br_state == XFS_EXT_NORM); - ep++; } } #else /* DEBUG */ -#define xfs_validate_extents(ep, nrecs, disk, fmt) +#define xfs_validate_extents(ifp, nrecs, disk, fmt) #endif /* DEBUG */ /* @@ -160,7 +159,7 @@ xfs_inotobp( xfs_dinode_t *dip; /* - * Call the space managment code to find the location of the + * Call the space management code to find the location of the * inode on disk. */ imap.im_blkno = 0; @@ -253,15 +252,14 @@ xfs_itobp( xfs_inode_t *ip, xfs_dinode_t **dipp, xfs_buf_t **bpp, - xfs_daddr_t bno) + xfs_daddr_t bno, + uint imap_flags) { + xfs_imap_t imap; xfs_buf_t *bp; int error; - xfs_imap_t imap; -#ifdef __KERNEL__ int i; int ni; -#endif if (ip->i_blkno == (xfs_daddr_t)0) { /* @@ -269,10 +267,9 @@ xfs_itobp( * inode on disk. */ imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP); - if (error != 0) { + if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, + XFS_IMAP_LOOKUP | imap_flags))) return error; - } /* * If the inode number maps to a block outside the bounds @@ -319,7 +316,6 @@ xfs_itobp( */ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, (int)imap.im_len, XFS_BUF_LOCK, &bp); - if (error) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " @@ -330,16 +326,20 @@ xfs_itobp( #endif /* DEBUG */ return error; } -#ifdef __KERNEL__ + /* * Validate the magic number and version of every inode in the buffer * (if DEBUG kernel) or the first inode in the buffer, otherwise. + * No validation is done here in userspace (xfs_repair). */ -#ifdef DEBUG +#if !defined(__KERNEL__) + ni = 0; +#elif defined(DEBUG) ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; -#else +#else /* usual case */ ni = 1; #endif + for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; @@ -348,11 +348,18 @@ xfs_itobp( (i << mp->m_sb.sb_inodelog)); di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, + XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { + if (imap_flags & XFS_IMAP_BULKSTAT) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EINVAL); + } #ifdef DEBUG - prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", - mp->m_ddev_targp, + cmn_err(CE_ALERT, + "Device %s - bad inode magic/vsn " + "daddr %lld #%d (magic=%x)", + XFS_BUFTARG_NAME(mp->m_ddev_targp), (unsigned long long)imap.im_blkno, i, INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); #endif @@ -362,7 +369,6 @@ xfs_itobp( return XFS_ERROR(EFSCORRUPTED); } } -#endif /* __KERNEL__ */ xfs_inobp_check(mp, bp); @@ -505,7 +511,7 @@ xfs_iformat( switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); - size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT); + size = be16_to_cpu(atp->hdr.totsize); error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: @@ -598,7 +604,6 @@ xfs_iformat_extents( xfs_bmbt_rec_t *ep, *dp; xfs_ifork_t *ifp; int nex; - int real_size; int size; int i; @@ -620,23 +625,20 @@ xfs_iformat_extents( return XFS_ERROR(EFSCORRUPTED); } - real_size = 0; + ifp->if_real_bytes = 0; if (nex == 0) ifp->if_u1.if_extents = NULL; else if (nex <= XFS_INLINE_EXTS) ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - else { - ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); - ASSERT(ifp->if_u1.if_extents != NULL); - real_size = size; - } + else + xfs_iext_add(ifp, 0, nex); + ifp->if_bytes = size; - ifp->if_real_bytes = real_size; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); - xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip)); - ep = ifp->if_u1.if_extents; - for (i = 0; i < nex; i++, ep++, dp++) { + xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip)); + for (i = 0; i < nex; i++, dp++) { + ep = xfs_iext_get_ext(ifp, i); ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), ARCH_CONVERT); ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), @@ -647,7 +649,7 @@ xfs_iformat_extents( if (whichfork != XFS_DATA_FORK || XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) if (unlikely(xfs_check_nostate_extents( - ifp->if_u1.if_extents, nex))) { + ifp, 0, nex))) { XFS_ERROR_REPORT("xfs_iformat_extents(2)", XFS_ERRLEVEL_LOW, ip->i_mount); @@ -736,16 +738,16 @@ xfs_xlate_dinode_core( xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; xfs_arch_t arch = ARCH_CONVERT; uint32_t uid = 0, gid = 0; - uint16_t xid = 0; + uint16_t tag = 0; ASSERT(dir); if (dir < 0) { - xid = mem_core->di_xid; + tag = mem_core->di_tag; /* FIXME: supposed to use superblock flag */ - uid = XIDINO_UID(1, mem_core->di_uid, xid); - gid = XIDINO_GID(1, mem_core->di_gid, xid); - xid = XIDINO_XID(1, xid); + uid = TAGINO_UID(1, mem_core->di_uid, tag); + gid = TAGINO_GID(1, mem_core->di_gid, tag); + tag = TAGINO_TAG(1, tag); } INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); @@ -755,15 +757,15 @@ xfs_xlate_dinode_core( INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); INT_XLATE(buf_core->di_uid, uid, dir, arch); INT_XLATE(buf_core->di_gid, gid, dir, arch); - INT_XLATE(buf_core->di_xid, xid, dir, arch); + INT_XLATE(buf_core->di_tag, tag, dir, arch); INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); if (dir > 0) { /* FIXME: supposed to use superblock flag */ - mem_core->di_uid = INOXID_UID(1, uid, gid); - mem_core->di_gid = INOXID_GID(1, uid, gid); - mem_core->di_xid = INOXID_XID(1, uid, gid, xid); + mem_core->di_uid = INOTAG_UID(1, uid, gid); + mem_core->di_gid = INOTAG_GID(1, uid, gid); + mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag); memcpy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); } else { @@ -800,7 +802,6 @@ xfs_xlate_dinode_core( STATIC uint _xfs_dic2xflags( - xfs_dinode_core_t *dic, __uint16_t di_flags) { uint flags = 0; @@ -834,6 +835,8 @@ _xfs_dic2xflags( flags |= XFS_XFLAG_EXTSIZE; if (di_flags & XFS_DIFLAG_EXTSZINHERIT) flags |= XFS_XFLAG_EXTSZINHERIT; + if (di_flags & XFS_DIFLAG_NODEFRAG) + flags |= XFS_XFLAG_NODEFRAG; } return flags; @@ -845,21 +848,21 @@ xfs_ip2xflags( { xfs_dinode_core_t *dic = &ip->i_d; - return _xfs_dic2xflags(dic, dic->di_flags) | - (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(dic->di_flags) | + (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); } uint xfs_dic2xflags( xfs_dinode_core_t *dic) { - return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | - (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); + return _xfs_dic2xflags(INT_GET(dic->di_flags, ARCH_CONVERT)) | + (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); } /* * Given a mount structure and an inode number, return a pointer - * to a newly allocated in-core inode coresponding to the given + * to a newly allocated in-core inode corresponding to the given * inode number. * * Initialize the inode's attributes and extent pointers if it @@ -871,7 +874,8 @@ xfs_iread( xfs_trans_t *tp, xfs_ino_t ino, xfs_inode_t **ipp, - xfs_daddr_t bno) + xfs_daddr_t bno, + uint imap_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; @@ -883,6 +887,7 @@ xfs_iread( ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -891,9 +896,8 @@ xfs_iread( * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will * know that this is a new incore inode. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno); - - if (error != 0) { + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); + if (error) { kmem_zone_free(xfs_inode_zone, ip); return error; } @@ -1035,6 +1039,7 @@ xfs_iread_extents( { int error; xfs_ifork_t *ifp; + xfs_extnum_t nextents; size_t size; if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { @@ -1042,26 +1047,24 @@ xfs_iread_extents( ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } - size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t); + nextents = XFS_IFORK_NEXTENTS(ip, whichfork); + size = nextents * sizeof(xfs_bmbt_rec_t); ifp = XFS_IFORK_PTR(ip, whichfork); + /* * We know that the size is valid (it's checked in iformat_btree) */ - ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); - ASSERT(ifp->if_u1.if_extents != NULL); ifp->if_lastex = NULLEXTNUM; - ifp->if_bytes = ifp->if_real_bytes = (int)size; + ifp->if_bytes = ifp->if_real_bytes = 0; ifp->if_flags |= XFS_IFEXTENTS; + xfs_iext_add(ifp, 0, nextents); error = xfs_bmap_read_extents(tp, ip, whichfork); if (error) { - kmem_free(ifp->if_u1.if_extents, size); - ifp->if_u1.if_extents = NULL; - ifp->if_bytes = ifp->if_real_bytes = 0; + xfs_iext_destroy(ifp); ifp->if_flags &= ~XFS_IFEXTENTS; return error; } - xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents, - XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip)); + xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip)); return 0; } @@ -1107,7 +1110,7 @@ xfs_ialloc( { xfs_ino_t ino; xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; uint flags; int error; @@ -1132,7 +1135,7 @@ xfs_ialloc( * to prevent others from looking at until we're done. */ error = xfs_trans_iget(tp->t_mountp, tp, ino, - IGET_CREATE, XFS_ILOCK_EXCL, &ip); + XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error != 0) { return error; } @@ -1145,7 +1148,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(cr); ip->i_d.di_gid = current_fsgid(cr); - ip->i_d.di_xid = current_fsxid(cr, vp); + ip->i_d.di_tag = current_fstag(cr, vp); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); @@ -1246,6 +1249,9 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NOSYMLINKS; if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && + xfs_inherit_nodefrag) + di_flags |= XFS_DIFLAG_NODEFRAG; ip->i_d.di_flags |= di_flags; } /* FALLTHROUGH */ @@ -1269,8 +1275,8 @@ xfs_ialloc( */ xfs_trans_log_inode(tp, ip, flags); - /* now that we have an i_mode we can set Linux inode ops (& unlock) */ - VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); + /* now that we have an i_mode we can setup inode ops and unlock */ + bhv_vfs_init_vnode(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); *ipp = ip; return 0; @@ -1310,7 +1316,7 @@ xfs_isize_check( (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, - NULL)) + NULL, NULL)) return; ASSERT(nimaps == 1); ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); @@ -1397,10 +1403,10 @@ xfs_itrunc_trace( (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), (void*)(unsigned long)(toss_finish & 0xffffffff), (void*)(unsigned long)current_cpu(), - (void*)0, - (void*)0, - (void*)0, - (void*)0); + (void*)(unsigned long)current_pid(), + (void*)NULL, + (void*)NULL, + (void*)NULL); } #else #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) @@ -1418,6 +1424,16 @@ xfs_itrunc_trace( * calling into the buffer/page cache code and we can't hold the * inode lock when we do so. * + * We need to wait for any direct I/Os in flight to complete before we + * proceed with the truncate. This is needed to prevent the extents + * being read or written by the direct I/Os from being removed while the + * I/O is in flight as there is no other method of synchronising + * direct I/O with the truncate operation. Also, because we hold + * the IOLOCK in exclusive mode, we prevent new direct I/Os from being + * started until the truncate completes and drops the lock. Essentially, + * the vn_iowait() call forms an I/O barrier that provides strict ordering + * between direct I/Os and the truncate operation. + * * The flags parameter can have either the value XFS_ITRUNC_DEFINITE * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used * in the case that the caller is locking things out of order and @@ -1436,7 +1452,7 @@ xfs_itruncate_start( xfs_fsize_t last_byte; xfs_off_t toss_start; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); @@ -1445,10 +1461,13 @@ xfs_itruncate_start( mp = ip->i_mount; vp = XFS_ITOV(ip); + + vn_iowait(vp); /* wait for the completion of any pending DIOs */ + /* - * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers + * Call toss_pages or flushinval_pages to get rid of pages * overlapping the region being removed. We have to use - * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the + * the less efficient flushinval_pages in the case that the * caller may not be able to finish the truncate without * dropping the inode's I/O lock. Make sure * to catch any pages brought in by buffers overlapping @@ -1457,10 +1476,10 @@ xfs_itruncate_start( * so that we don't toss things on the same block as * new_size but before it. * - * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to + * Before calling toss_page or flushinval_pages, make sure to * call remapf() over the same region if the file is mapped. * This frees up mapped file references to the pages in the - * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures + * given range and for the flushinval_pages case it ensures * that we get the latest mapped changes flushed out. */ toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); @@ -1478,9 +1497,9 @@ xfs_itruncate_start( last_byte); if (last_byte > toss_start) { if (flags & XFS_ITRUNC_DEFINITE) { - VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_toss_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } else { - VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); + bhv_vop_flushinval_pages(vp, toss_start, -1, FI_REMAPF_LOCKED); } } @@ -1678,12 +1697,13 @@ xfs_itruncate_finish( * runs. */ XFS_BMAP_INIT(&free_list, &first_block); - error = xfs_bunmapi(ntp, ip, first_unmap_block, - unmap_len, + error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, + first_unmap_block, unmap_len, XFS_BMAPI_AFLAG(fork) | (sync ? 0 : XFS_BMAPI_ASYNC), XFS_ITRUNC_MAX_EXTENTS, - &first_block, &free_list, &done); + &first_block, &free_list, + NULL, &done); if (error) { /* * If the bunmapi call encounters an error, @@ -1920,7 +1940,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { return error; } @@ -1967,9 +1987,9 @@ xfs_iunlink_remove( xfs_agino_t agino; xfs_agino_t next_agino; xfs_buf_t *last_ibp; - xfs_dinode_t *last_dip; + xfs_dinode_t *last_dip = NULL; short bucket_index; - int offset, last_offset; + int offset, last_offset = 0; int error; int agi_ok; @@ -2029,7 +2049,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2091,7 +2111,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2194,7 +2214,7 @@ xfs_ifree_cluster( /* Inode not in memory or we found it already, * nothing to do */ - if (!ip || (ip->i_flags & XFS_ISTALE)) { + if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { read_unlock(&ih->ih_lock); continue; } @@ -2216,8 +2236,7 @@ xfs_ifree_cluster( if (ip == free_ip) { if (xfs_iflock_nowait(ip)) { - ip->i_flags |= XFS_ISTALE; - + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); } else { @@ -2230,7 +2249,7 @@ xfs_ifree_cluster( if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { if (xfs_iflock_nowait(ip)) { - ip->i_flags |= XFS_ISTALE; + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2260,7 +2279,7 @@ xfs_ifree_cluster( AIL_LOCK(mp,s); iip->ili_flush_lsn = iip->ili_item.li_lsn; AIL_UNLOCK(mp, s); - iip->ili_inode->i_flags |= XFS_ISTALE; + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } lip = lip->li_bio_list; @@ -2496,92 +2515,6 @@ xfs_iroot_realloc( } -/* - * This is called when the amount of space needed for if_extents - * is increased or decreased. The change in size is indicated by - * the number of extents that need to be added or deleted in the - * ext_diff parameter. - * - * If the amount of space needed has decreased below the size of the - * inline buffer, then switch to using the inline buffer. Otherwise, - * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer - * to what is needed. - * - * ip -- the inode whose if_extents area is changing - * ext_diff -- the change in the number of extents, positive or negative, - * requested for the if_extents array. - */ -void -xfs_iext_realloc( - xfs_inode_t *ip, - int ext_diff, - int whichfork) -{ - int byte_diff; - xfs_ifork_t *ifp; - int new_size; - uint rnew_size; - - if (ext_diff == 0) { - return; - } - - ifp = XFS_IFORK_PTR(ip, whichfork); - byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t); - new_size = (int)ifp->if_bytes + byte_diff; - ASSERT(new_size >= 0); - - if (new_size == 0) { - if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { - ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); - } - ifp->if_u1.if_extents = NULL; - rnew_size = 0; - } else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) { - /* - * If the valid extents can fit in if_inline_ext, - * copy them from the malloc'd vector and free it. - */ - if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { - /* - * For now, empty files are format EXTENTS, - * so the if_extents pointer is null. - */ - if (ifp->if_u1.if_extents) { - memcpy(ifp->if_u2.if_inline_ext, - ifp->if_u1.if_extents, new_size); - kmem_free(ifp->if_u1.if_extents, - ifp->if_real_bytes); - } - ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; - } - rnew_size = 0; - } else { - rnew_size = new_size; - if ((rnew_size & (rnew_size - 1)) != 0) - rnew_size = xfs_iroundup(rnew_size); - /* - * Stuck with malloc/realloc. - */ - if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) { - ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) - kmem_alloc(rnew_size, KM_SLEEP); - memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, - sizeof(ifp->if_u2.if_inline_ext)); - } else if (rnew_size != ifp->if_real_bytes) { - ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) - kmem_realloc(ifp->if_u1.if_extents, - rnew_size, - ifp->if_real_bytes, - KM_NOFS); - } - } - ifp->if_real_bytes = rnew_size; - ifp->if_bytes = new_size; -} - - /* * This is called when the amount of space needed for if_data * is increased or decreased. The change in size is indicated by @@ -2741,12 +2674,11 @@ xfs_idestroy_fork( ifp->if_real_bytes = 0; } } else if ((ifp->if_flags & XFS_IFEXTENTS) && - (ifp->if_u1.if_extents != NULL) && - (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { + ((ifp->if_flags & XFS_IFEXTIREC) || + ((ifp->if_u1.if_extents != NULL) && + (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); - ifp->if_u1.if_extents = NULL; - ifp->if_real_bytes = 0; + xfs_iext_destroy(ifp); } ASSERT(ifp->if_u1.if_extents == NULL || ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); @@ -2822,7 +2754,7 @@ xfs_ipin( /* * Decrement the pin count of the given inode, and wake up * anyone in xfs_iwait_unpin() if the count goes to 0. The - * inode must have been previoulsy pinned with a call to xfs_ipin(). + * inode must have been previously pinned with a call to xfs_ipin(). */ void xfs_iunpin( @@ -2830,17 +2762,38 @@ xfs_iunpin( { ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_test(&ip->i_pincount)) { - vnode_t *vp = XFS_ITOV_NULL(ip); + if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { + + /* + * If the inode is currently being reclaimed, the link between + * the bhv_vnode and the xfs_inode will be broken after the + * XFS_IRECLAIM* flag is set. Hence, if these flags are not + * set, then we can move forward and mark the linux inode dirty + * knowing that it is still valid as it won't freed until after + * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The + * i_flags_lock is used to synchronise the setting of the + * XFS_IRECLAIM* flags and the breaking of the link, and so we + * can execute atomically w.r.t to reclaim by holding this lock + * here. + * + * However, we still need to issue the unpin wakeup call as the + * inode reclaim may be blocked waiting for the inode to become + * unpinned. + */ + + if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *inode = NULL; - /* make sync come back and flush this inode */ - if (vp) { - struct inode *inode = LINVFS_GET_IP(vp); + BUG_ON(vp == NULL); + inode = vn_to_inode(vp); + BUG_ON(inode->i_state & I_CLEAR); - if (!(inode->i_state & I_NEW)) + /* make sync come back and flush this inode */ + if (!(inode->i_state & (I_NEW|I_FREEING))) mark_inode_dirty_sync(inode); } - + spin_unlock(&ip->i_flags_lock); wake_up(&ip->i_ipin_wait); } } @@ -2923,16 +2876,15 @@ xfs_iextents_copy( * the delayed ones. There must be at least one * non-delayed extent. */ - ep = ifp->if_u1.if_extents; dest_ep = buffer; copied = 0; for (i = 0; i < nrecs; i++) { + ep = xfs_iext_get_ext(ifp, i); start_block = xfs_bmbt_get_startblock(ep); if (ISNULLSTARTBLOCK(start_block)) { /* * It's a delayed allocation extent, so skip it. */ - ep++; continue; } @@ -2942,11 +2894,10 @@ xfs_iextents_copy( put_unaligned(INT_GET(ep->l1, ARCH_CONVERT), (__uint64_t*)&dest_ep->l1); dest_ep++; - ep++; copied++; } ASSERT(copied != 0); - xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip)); + xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip)); return (copied * (uint)sizeof(xfs_bmbt_rec_t)); } @@ -3004,20 +2955,15 @@ xfs_iflush_fork( ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); } - if (whichfork == XFS_DATA_FORK) { - if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) { - XFS_ERROR_REPORT("xfs_iflush_fork", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } - } break; case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || !(iip->ili_format.ilf_fields & extflag[whichfork])); - ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0)); - ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0)); + ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || + (ifp->if_bytes == 0)); + ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || + (ifp->if_bytes > 0)); if ((iip->ili_format.ilf_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); @@ -3092,7 +3038,7 @@ xfs_iflush( XFS_STATS_INC(xs_iflush_count); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3135,8 +3081,8 @@ xfs_iflush( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); - if (error != 0) { + error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); + if (error) { xfs_ifunlock(ip); return error; } @@ -3285,7 +3231,7 @@ xfs_iflush( corrupt_out: xfs_buf_relse(bp); - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_iflush_abort(ip); /* * Unlocks the flush lock @@ -3307,7 +3253,7 @@ cluster_corrupt_out: xfs_buf_relse(bp); } - xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); if(!bufwasdelwri) { /* @@ -3350,7 +3296,7 @@ xfs_iflush_int( SPLDECL(s); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); - ASSERT(valusema(&ip->i_flock) <= 0); + ASSERT(issemalocked(&(ip->i_flock))); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3590,7 +3536,7 @@ xfs_iflush_all( xfs_mount_t *mp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; again: XFS_MOUNT_ILOCK(mp); @@ -3631,7 +3577,7 @@ xfs_iaccess( { int error; mode_t orgmode = mode; - struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); + struct inode *inode = vn_to_inode(XFS_ITOV(ip)); if (mode & S_IWUSR) { umode_t imode = inode->i_mode; @@ -3725,3 +3671,1100 @@ xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); } #endif + +/* + * Return a pointer to the extent record at file index idx. + */ +xfs_bmbt_rec_t * +xfs_iext_get_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx) /* index of target extent */ +{ + ASSERT(idx >= 0); + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { + return ifp->if_u1.if_ext_irec->er_extbuf; + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_ext_irec_t *erp; /* irec pointer */ + int erp_idx = 0; /* irec index */ + xfs_extnum_t page_idx = idx; /* ext index in target list */ + + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + return &erp->er_extbuf[page_idx]; + } else if (ifp->if_bytes) { + return &ifp->if_u1.if_extents[idx]; + } else { + return NULL; + } +} + +/* + * Insert new item(s) into the extent records for incore inode + * fork 'ifp'. 'count' new items are inserted at index 'idx'. + */ +void +xfs_iext_insert( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* starting index of new items */ + xfs_extnum_t count, /* number of inserted items */ + xfs_bmbt_irec_t *new) /* items to insert */ +{ + xfs_bmbt_rec_t *ep; /* extent record pointer */ + xfs_extnum_t i; /* extent record index */ + + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_iext_add(ifp, idx, count); + for (i = idx; i < idx + count; i++, new++) { + ep = xfs_iext_get_ext(ifp, i); + xfs_bmbt_set_all(ep, new); + } +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be increased. The ext_diff parameter stores the + * number of new extents being added and the idx parameter contains + * the extent index where the new extents will be added. If the new + * extents are being appended, then we just need to (re)allocate and + * initialize the space. Otherwise, if the new extents are being + * inserted into the middle of the existing entries, a bit more work + * is required to make room for the new extents to be inserted. The + * caller is responsible for filling in the new extent entries upon + * return. + */ +void +xfs_iext_add( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin adding exts */ + int ext_diff) /* number of extents to add */ +{ + int byte_diff; /* new bytes being added */ + int new_size; /* size of extents after adding */ + xfs_extnum_t nextents; /* number of extents in file */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT((idx >= 0) && (idx <= nextents)); + byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); + new_size = ifp->if_bytes + byte_diff; + /* + * If the new number of extents (nextents + ext_diff) + * fits inside the inode, then continue to use the inline + * extent buffer. + */ + if (nextents + ext_diff <= XFS_INLINE_EXTS) { + if (idx < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], + &ifp->if_u2.if_inline_ext[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); + } + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; + ifp->if_lastex = nextents + ext_diff; + } + /* + * Otherwise use a linear (direct) extent list. + * If the extents are currently inside the inode, + * xfs_iext_realloc_direct will switch us from + * inline to direct extent allocation mode. + */ + else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { + xfs_iext_realloc_direct(ifp, new_size); + if (idx < nextents) { + memmove(&ifp->if_u1.if_extents[idx + ext_diff], + &ifp->if_u1.if_extents[idx], + (nextents - idx) * sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); + } + } + /* Indirection array */ + else { + xfs_ext_irec_t *erp; + int erp_idx = 0; + int page_idx = idx; + + ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); + if (ifp->if_flags & XFS_IFEXTIREC) { + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); + } else { + xfs_iext_irec_init(ifp); + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = ifp->if_u1.if_ext_irec; + } + /* Extents fit in target extent page */ + if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { + if (page_idx < erp->er_extcount) { + memmove(&erp->er_extbuf[page_idx + ext_diff], + &erp->er_extbuf[page_idx], + (erp->er_extcount - page_idx) * + sizeof(xfs_bmbt_rec_t)); + memset(&erp->er_extbuf[page_idx], 0, byte_diff); + } + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + } + /* Insert a new extent page */ + else if (erp) { + xfs_iext_add_indirect_multi(ifp, + erp_idx, page_idx, ext_diff); + } + /* + * If extent(s) are being appended to the last page in + * the indirection array and the new extent(s) don't fit + * in the page, then erp is NULL and erp_idx is set to + * the next index needed in the indirection array. + */ + else { + int count = ext_diff; + + while (count) { + erp = xfs_iext_irec_new(ifp, erp_idx); + erp->er_extcount = count; + count -= MIN(count, (int)XFS_LINEAR_EXTS); + if (count) { + erp_idx++; + } + } + } + } + ifp->if_bytes = new_size; +} + +/* + * This is called when incore extents are being added to the indirection + * array and the new extents do not fit in the target extent list. The + * erp_idx parameter contains the irec index for the target extent list + * in the indirection array, and the idx parameter contains the extent + * index within the list. The number of extents being added is stored + * in the count parameter. + * + * |-------| |-------| + * | | | | idx - number of extents before idx + * | idx | | count | + * | | | | count - number of extents being inserted at idx + * |-------| |-------| + * | count | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_add_indirect_multi( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* target extent irec index */ + xfs_extnum_t idx, /* index within target list */ + int count) /* new extents being added */ +{ + int byte_diff; /* new bytes being added */ + xfs_ext_irec_t *erp; /* pointer to irec entry */ + xfs_extnum_t ext_diff; /* number of extents to add */ + xfs_extnum_t ext_cnt; /* new extents still needed */ + xfs_extnum_t nex2; /* extents after idx + count */ + xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ + int nlists; /* number of irec's (lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex2 = erp->er_extcount - idx; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* + * Save second part of target extent list + * (all extents past */ + if (nex2) { + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); + memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); + erp->er_extcount -= nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); + memset(&erp->er_extbuf[idx], 0, byte_diff); + } + + /* + * Add the new extents to the end of the target + * list, then allocate new irec record(s) and + * extent buffer(s) as needed to store the rest + * of the new extents. + */ + ext_cnt = count; + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); + if (ext_diff) { + erp->er_extcount += ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + while (ext_cnt) { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); + erp->er_extcount = ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); + ext_cnt -= ext_diff; + } + + /* Add nex2 extents back to indirection array */ + if (nex2) { + xfs_extnum_t ext_avail; + int i; + + byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); + ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; + i = 0; + /* + * If nex2 extents fit in the current page, append + * nex2_ep after the new extents. + */ + if (nex2 <= ext_avail) { + i = erp->er_extcount; + } + /* + * Otherwise, check if space is available in the + * next page. + */ + else if ((erp_idx < nlists - 1) && + (nex2 <= (ext_avail = XFS_LINEAR_EXTS - + ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { + erp_idx++; + erp++; + /* Create a hole for nex2 extents */ + memmove(&erp->er_extbuf[nex2], erp->er_extbuf, + erp->er_extcount * sizeof(xfs_bmbt_rec_t)); + } + /* + * Final choice, create a new extent page for + * nex2 extents. + */ + else { + erp_idx++; + erp = xfs_iext_irec_new(ifp, erp_idx); + } + memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); + kmem_free(nex2_ep, byte_diff); + erp->er_extcount += nex2; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); + } +} + +/* + * This is called when the amount of space required for incore file + * extents needs to be decreased. The ext_diff parameter stores the + * number of extents to be removed and the idx parameter contains + * the extent index where the extents will be removed from. + * + * If the amount of space needed has decreased below the linear + * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous + * extent array. Otherwise, use kmem_realloc() to adjust the + * size to what is needed. + */ +void +xfs_iext_remove( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + ASSERT(ext_diff > 0); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else if (ifp->if_flags & XFS_IFEXTIREC) { + xfs_iext_remove_indirect(ifp, idx, ext_diff); + } else if (ifp->if_real_bytes) { + xfs_iext_remove_direct(ifp, idx, ext_diff); + } else { + xfs_iext_remove_inline(ifp, idx, ext_diff); + } + ifp->if_bytes = new_size; +} + +/* + * This removes ext_diff extents from the inline buffer, beginning + * at extent index idx. + */ +void +xfs_iext_remove_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + int nextents; /* number of extents in file */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + ASSERT(idx < XFS_INLINE_EXTS); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(((nextents - ext_diff) > 0) && + (nextents - ext_diff) < XFS_INLINE_EXTS); + + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u2.if_inline_ext[idx], + &ifp->if_u2.if_inline_ext[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + } else { + memset(&ifp->if_u2.if_inline_ext[idx], 0, + ext_diff * sizeof(xfs_bmbt_rec_t)); + } +} + +/* + * This removes ext_diff extents from a linear (direct) extent list, + * beginning at extent index idx. If the extents are being removed + * from the end of the list (ie. truncate) then we just need to re- + * allocate the list to remove the extra space. Otherwise, if the + * extents are being removed from the middle of the existing extent + * entries, then we first need to move the extent records beginning + * at idx + ext_diff up in the list to overwrite the records being + * removed, then remove the extra space via kmem_realloc. + */ +void +xfs_iext_remove_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing exts */ + int ext_diff) /* number of extents to remove */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int new_size; /* size of extents after removal */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + new_size = ifp->if_bytes - + (ext_diff * sizeof(xfs_bmbt_rec_t)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (new_size == 0) { + xfs_iext_destroy(ifp); + return; + } + /* Move extents up in the list (if needed) */ + if (idx + ext_diff < nextents) { + memmove(&ifp->if_u1.if_extents[idx], + &ifp->if_u1.if_extents[idx + ext_diff], + (nextents - (idx + ext_diff)) * + sizeof(xfs_bmbt_rec_t)); + } + memset(&ifp->if_u1.if_extents[nextents - ext_diff], + 0, ext_diff * sizeof(xfs_bmbt_rec_t)); + /* + * Reallocate the direct extent list. If the extents + * will fit inside the inode then xfs_iext_realloc_direct + * will switch from direct to inline extent allocation + * mode for us. + */ + xfs_iext_realloc_direct(ifp, new_size); + ifp->if_bytes = new_size; +} + +/* + * This is called when incore extents are being removed from the + * indirection array and the extents being removed span multiple extent + * buffers. The idx parameter contains the file extent index where we + * want to begin removing extents, and the count parameter contains + * how many extents need to be removed. + * + * |-------| |-------| + * | nex1 | | | nex1 - number of extents before idx + * |-------| | count | + * | | | | count - number of extents being removed at idx + * | count | |-------| + * | | | nex2 | nex2 - number of extents after idx + count + * |-------| |-------| + */ +void +xfs_iext_remove_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t idx, /* index to begin removing extents */ + int count) /* number of extents to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int erp_idx = 0; /* indirection array index */ + xfs_extnum_t ext_cnt; /* extents left to remove */ + xfs_extnum_t ext_diff; /* extents to remove in current list */ + xfs_extnum_t nex1; /* number of extents before idx */ + xfs_extnum_t nex2; /* extents after idx + count */ + int nlists; /* entries in indirection array */ + int page_idx = idx; /* index in target extent list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); + ASSERT(erp != NULL); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + nex1 = page_idx; + ext_cnt = count; + while (ext_cnt) { + nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); + ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); + /* + * Check for deletion of entire list; + * xfs_iext_irec_remove() updates extent offsets. + */ + if (ext_diff == erp->er_extcount) { + xfs_iext_irec_remove(ifp, erp_idx); + ext_cnt -= ext_diff; + nex1 = 0; + if (ext_cnt) { + ASSERT(erp_idx < ifp->if_real_bytes / + XFS_IEXT_BUFSZ); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nex1 = 0; + continue; + } else { + break; + } + } + /* Move extents up (if needed) */ + if (nex2) { + memmove(&erp->er_extbuf[nex1], + &erp->er_extbuf[nex1 + ext_diff], + nex2 * sizeof(xfs_bmbt_rec_t)); + } + /* Zero out rest of page */ + memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - + ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); + /* Update remaining counters */ + erp->er_extcount -= ext_diff; + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); + ext_cnt -= ext_diff; + nex1 = 0; + erp_idx++; + erp++; + } + ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); + xfs_iext_irec_compact(ifp); +} + +/* + * Create, destroy, or resize a linear (direct) block of extents. + */ +void +xfs_iext_realloc_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new size of extents */ +{ + int rnew_size; /* real new size of extents */ + + rnew_size = new_size; + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || + ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && + (new_size != ifp->if_real_bytes))); + + /* Free extent records */ + if (new_size == 0) { + xfs_iext_destroy(ifp); + } + /* Resize direct extent list and zero any new bytes */ + else if (ifp->if_real_bytes) { + /* Check if extents will fit inside the inode */ + if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { + xfs_iext_direct_to_inline(ifp, new_size / + (uint)sizeof(xfs_bmbt_rec_t)); + ifp->if_bytes = new_size; + return; + } + if ((new_size & (new_size - 1)) != 0) { + rnew_size = xfs_iroundup(new_size); + } + if (rnew_size != ifp->if_real_bytes) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_realloc(ifp->if_u1.if_extents, + rnew_size, + ifp->if_real_bytes, + KM_SLEEP); + } + if (rnew_size > ifp->if_real_bytes) { + memset(&ifp->if_u1.if_extents[ifp->if_bytes / + (uint)sizeof(xfs_bmbt_rec_t)], 0, + rnew_size - ifp->if_real_bytes); + } + } + /* + * Switch from the inline extent buffer to a direct + * extent list. Be sure to include the inline extent + * bytes in new_size. + */ + else { + new_size += ifp->if_bytes; + if ((new_size & (new_size - 1)) != 0) { + rnew_size = xfs_iroundup(new_size); + } + xfs_iext_inline_to_direct(ifp, rnew_size); + } + ifp->if_real_bytes = rnew_size; + ifp->if_bytes = new_size; +} + +/* + * Switch from linear (direct) extent records to inline buffer. + */ +void +xfs_iext_direct_to_inline( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t nextents) /* number of extents in file */ +{ + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(nextents <= XFS_INLINE_EXTS); + /* + * The inline buffer was zeroed when we switched + * from inline to direct extent allocation mode, + * so we don't need to clear it here. + */ + memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, + nextents * sizeof(xfs_bmbt_rec_t)); + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + ifp->if_real_bytes = 0; +} + +/* + * Switch from inline buffer to linear (direct) extent records. + * new_size should already be rounded up to the next power of 2 + * by the caller (when appropriate), so use new_size as it is. + * However, since new_size may be rounded up, we can't update + * if_bytes here. It is the caller's responsibility to update + * if_bytes upon return. + */ +void +xfs_iext_inline_to_direct( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* number of extents in file */ +{ + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_alloc(new_size, KM_SLEEP); + memset(ifp->if_u1.if_extents, 0, new_size); + if (ifp->if_bytes) { + memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, + ifp->if_bytes); + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_real_bytes = new_size; +} + +/* + * Resize an extent indirection array to new_size bytes. + */ +void +xfs_iext_realloc_indirect( + xfs_ifork_t *ifp, /* inode fork pointer */ + int new_size) /* new indirection array size */ +{ + int nlists; /* number of irec's (ex lists) */ + int size; /* current indirection array size */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + size = nlists * sizeof(xfs_ext_irec_t); + ASSERT(ifp->if_real_bytes); + ASSERT((new_size >= 0) && (new_size != size)); + if (new_size == 0) { + xfs_iext_destroy(ifp); + } else { + ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) + kmem_realloc(ifp->if_u1.if_ext_irec, + new_size, size, KM_SLEEP); + } +} + +/* + * Switch from indirection array to linear (direct) extent allocations. + */ +void +xfs_iext_indirect_to_direct( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_bmbt_rec_t *ep; /* extent record pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + int size; /* size of file extents */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + size = nextents * sizeof(xfs_bmbt_rec_t); + + xfs_iext_irec_compact_full(ifp); + ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); + + ep = ifp->if_u1.if_ext_irec->er_extbuf; + kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); + ifp->if_flags &= ~XFS_IFEXTIREC; + ifp->if_u1.if_extents = ep; + ifp->if_bytes = size; + if (nextents < XFS_LINEAR_EXTS) { + xfs_iext_realloc_direct(ifp, size); + } +} + +/* + * Free incore file extents. + */ +void +xfs_iext_destroy( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + if (ifp->if_flags & XFS_IFEXTIREC) { + int erp_idx; + int nlists; + + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { + xfs_iext_irec_remove(ifp, erp_idx); + } + ifp->if_flags &= ~XFS_IFEXTIREC; + } else if (ifp->if_real_bytes) { + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + } else if (ifp->if_bytes) { + memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * + sizeof(xfs_bmbt_rec_t)); + } + ifp->if_u1.if_extents = NULL; + ifp->if_real_bytes = 0; + ifp->if_bytes = 0; +} + +/* + * Return a pointer to the extent record for file system block bno. + */ +xfs_bmbt_rec_t * /* pointer to found extent record */ +xfs_iext_bno_to_ext( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + xfs_extnum_t *idxp) /* index of target extent */ +{ + xfs_bmbt_rec_t *base; /* pointer to first extent */ + xfs_filblks_t blockcount = 0; /* number of blocks in extent */ + xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + int high; /* upper boundary in search */ + xfs_extnum_t idx = 0; /* index of target extent */ + int low; /* lower boundary in search */ + xfs_extnum_t nextents; /* number of file extents */ + xfs_fileoff_t startoff = 0; /* start offset of extent */ + + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *idxp = 0; + return NULL; + } + low = 0; + if (ifp->if_flags & XFS_IFEXTIREC) { + /* Find target extent list */ + int erp_idx = 0; + erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); + base = erp->er_extbuf; + high = erp->er_extcount - 1; + } else { + base = ifp->if_u1.if_extents; + high = nextents - 1; + } + /* Binary search extent records */ + while (low <= high) { + idx = (low + high) >> 1; + ep = base + idx; + startoff = xfs_bmbt_get_startoff(ep); + blockcount = xfs_bmbt_get_blockcount(ep); + if (bno < startoff) { + high = idx - 1; + } else if (bno >= startoff + blockcount) { + low = idx + 1; + } else { + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + *idxp = idx; + return ep; + } + } + /* Convert back to file-based extent index */ + if (ifp->if_flags & XFS_IFEXTIREC) { + idx += erp->er_extoff; + } + if (bno >= startoff + blockcount) { + if (++idx == nextents) { + ep = NULL; + } else { + ep = xfs_iext_get_ext(ifp, idx); + } + } + *idxp = idx; + return ep; +} + +/* + * Return a pointer to the indirection array entry containing the + * extent record for filesystem block bno. Store the index of the + * target irec in *erp_idxp. + */ +xfs_ext_irec_t * /* pointer to found extent record */ +xfs_iext_bno_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_fileoff_t bno, /* block number to search for */ + int *erp_idxp) /* irec index of target ext list */ +{ + xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ + xfs_ext_irec_t *erp_next; /* next indirection array entry */ + int erp_idx; /* indirection array index */ + int nlists; /* number of extent irec's (lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; + if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { + high = erp_idx - 1; + } else if (erp_next && bno >= + xfs_bmbt_get_startoff(erp_next->er_extbuf)) { + low = erp_idx + 1; + } else { + break; + } + } + *erp_idxp = erp_idx; + return erp; +} + +/* + * Return a pointer to the indirection array entry containing the + * extent record at file extent index *idxp. Store the index of the + * target irec in *erp_idxp and store the page index of the target + * extent record in *idxp. + */ +xfs_ext_irec_t * +xfs_iext_idx_to_irec( + xfs_ifork_t *ifp, /* inode fork pointer */ + xfs_extnum_t *idxp, /* extent index (file -> page) */ + int *erp_idxp, /* pointer to target irec */ + int realloc) /* new bytes were just added */ +{ + xfs_ext_irec_t *prev; /* pointer to previous irec */ + xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ + int erp_idx; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + int high; /* binary search upper limit */ + int low; /* binary search lower limit */ + xfs_extnum_t page_idx = *idxp; /* extent index in target list */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + ASSERT(page_idx >= 0 && page_idx <= + ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp_idx = 0; + low = 0; + high = nlists - 1; + + /* Binary search extent irec's */ + while (low <= high) { + erp_idx = (low + high) >> 1; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + prev = erp_idx > 0 ? erp - 1 : NULL; + if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && + realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { + high = erp_idx - 1; + } else if (page_idx > erp->er_extoff + erp->er_extcount || + (page_idx == erp->er_extoff + erp->er_extcount && + !realloc)) { + low = erp_idx + 1; + } else if (page_idx == erp->er_extoff + erp->er_extcount && + erp->er_extcount == XFS_LINEAR_EXTS) { + ASSERT(realloc); + page_idx = 0; + erp_idx++; + erp = erp_idx < nlists ? erp + 1 : NULL; + break; + } else { + page_idx -= erp->er_extoff; + break; + } + } + *idxp = page_idx; + *erp_idxp = erp_idx; + return(erp); +} + +/* + * Allocate and initialize an indirection array once the space needed + * for incore extents increases above XFS_IEXT_BUFSZ. + */ +void +xfs_iext_irec_init( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + xfs_extnum_t nextents; /* number of extents in file */ + + ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(nextents <= XFS_LINEAR_EXTS); + + erp = (xfs_ext_irec_t *) + kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); + + if (nextents == 0) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + } else if (!ifp->if_real_bytes) { + xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); + } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { + xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); + } + erp->er_extbuf = ifp->if_u1.if_extents; + erp->er_extcount = nextents; + erp->er_extoff = 0; + + ifp->if_flags |= XFS_IFEXTIREC; + ifp->if_real_bytes = XFS_IEXT_BUFSZ; + ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); + ifp->if_u1.if_ext_irec = erp; + + return; +} + +/* + * Allocate and initialize a new entry in the indirection array. + */ +xfs_ext_irec_t * +xfs_iext_irec_new( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* index for new irec */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* Resize indirection array */ + xfs_iext_realloc_indirect(ifp, ++nlists * + sizeof(xfs_ext_irec_t)); + /* + * Move records down in the array so the + * new page can use erp_idx. + */ + erp = ifp->if_u1.if_ext_irec; + for (i = nlists - 1; i > erp_idx; i--) { + memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); + } + ASSERT(i == erp_idx); + + /* Initialize new extent record */ + erp = ifp->if_u1.if_ext_irec; + erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *) + kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; + memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); + erp[erp_idx].er_extcount = 0; + erp[erp_idx].er_extoff = erp_idx > 0 ? + erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; + return (&erp[erp_idx]); +} + +/* + * Remove a record from the indirection array. + */ +void +xfs_iext_irec_remove( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx) /* irec index to remove */ +{ + xfs_ext_irec_t *erp; /* indirection array pointer */ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + if (erp->er_extbuf) { + xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, + -erp->er_extcount); + kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); + } + /* Compact extent records */ + erp = ifp->if_u1.if_ext_irec; + for (i = erp_idx; i < nlists - 1; i++) { + memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); + } + /* + * Manually free the last extent record from the indirection + * array. A call to xfs_iext_realloc_indirect() with a size + * of zero would result in a call to xfs_iext_destroy() which + * would in turn call this function again, creating a nasty + * infinite loop. + */ + if (--nlists) { + xfs_iext_realloc_indirect(ifp, + nlists * sizeof(xfs_ext_irec_t)); + } else { + kmem_free(ifp->if_u1.if_ext_irec, + sizeof(xfs_ext_irec_t)); + } + ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; +} + +/* + * This is called to clean up large amounts of unused memory allocated + * by the indirection array. Before compacting anything though, verify + * that the indirection array is still needed and switch back to the + * linear extent list (or even the inline buffer) if possible. The + * compaction policy is as follows: + * + * Full Compaction: Extents fit into a single page (or inline buffer) + * Full Compaction: Extents occupy less than 10% of allocated space + * Partial Compaction: Extents occupy > 10% and < 50% of allocated space + * No Compaction: Extents occupy at least 50% of allocated space + */ +void +xfs_iext_irec_compact( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_extnum_t nextents; /* number of extents in file */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + + if (nextents == 0) { + xfs_iext_destroy(ifp); + } else if (nextents <= XFS_INLINE_EXTS) { + xfs_iext_indirect_to_direct(ifp); + xfs_iext_direct_to_inline(ifp, nextents); + } else if (nextents <= XFS_LINEAR_EXTS) { + xfs_iext_indirect_to_direct(ifp); + } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { + xfs_iext_irec_compact_full(ifp); + } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { + xfs_iext_irec_compact_pages(ifp); + } +} + +/* + * Combine extents from neighboring extent pages. + */ +void +xfs_iext_irec_compact_pages( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ + int erp_idx = 0; /* indirection array index */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + while (erp_idx < nlists - 1) { + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + erp_next = erp + 1; + if (erp_next->er_extcount <= + (XFS_LINEAR_EXTS - erp->er_extcount)) { + memmove(&erp->er_extbuf[erp->er_extcount], + erp_next->er_extbuf, erp_next->er_extcount * + sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += erp_next->er_extcount; + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + } else { + erp_idx++; + } + } +} + +/* + * Fully compact the extent records managed by the indirection array. + */ +void +xfs_iext_irec_compact_full( + xfs_ifork_t *ifp) /* inode fork pointer */ +{ + xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */ + xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ + int erp_idx = 0; /* extent irec index */ + int ext_avail; /* empty entries in ex list */ + int ext_diff; /* number of exts to add */ + int nlists; /* number of irec's (ex lists) */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + erp = ifp->if_u1.if_ext_irec; + ep = &erp->er_extbuf[erp->er_extcount]; + erp_next = erp + 1; + ep_next = erp_next->er_extbuf; + while (erp_idx < nlists - 1) { + ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; + ext_diff = MIN(ext_avail, erp_next->er_extcount); + memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += ext_diff; + erp_next->er_extcount -= ext_diff; + /* Remove next page */ + if (erp_next->er_extcount == 0) { + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf, + erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + /* Update next page */ + } else { + /* Move rest of page up to become next new page */ + memmove(erp_next->er_extbuf, ep_next, + erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); + ep_next = erp_next->er_extbuf; + memset(&ep_next[erp_next->er_extcount], 0, + (XFS_LINEAR_EXTS - erp_next->er_extcount) * + sizeof(xfs_bmbt_rec_t)); + } + if (erp->er_extcount == XFS_LINEAR_EXTS) { + erp_idx++; + if (erp_idx < nlists) + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + else + break; + } + ep = &erp->er_extbuf[erp->er_extcount]; + erp_next = erp + 1; + ep_next = erp_next->er_extbuf; + } +} + +/* + * This is called to update the er_extoff field in the indirection + * array when extents have been added or removed from one of the + * extent lists. erp_idx contains the irec index to begin updating + * at and ext_diff contains the number of extents that were added + * or removed. + */ +void +xfs_iext_irec_update_extoffs( + xfs_ifork_t *ifp, /* inode fork pointer */ + int erp_idx, /* irec index to update */ + int ext_diff) /* number of new extents */ +{ + int i; /* loop counter */ + int nlists; /* number of irec's (ex lists */ + + ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + for (i = erp_idx; i < nlists; i++) { + ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; + } +}