fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / fs / xfs / xfs_itable.c
index d2a21c4..1d3ce38 100644 (file)
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir.h"
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
-#include "xfs_dir_sf.h"
 #include "xfs_dir2_sf.h"
 #include "xfs_attr_sf.h"
 #include "xfs_dinode.h"
 #include "xfs_error.h"
 #include "xfs_btree.h"
 
-#ifndef HAVE_USERACC
-#define useracc(ubuffer, size, flags, foo) (0)
-#define unuseracc(ubuffer, size, flags)
-#endif
+int
+xfs_internal_inum(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino)
+{
+       return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
+               (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+                (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
+}
 
 STATIC int
 xfs_bulkstat_one_iget(
@@ -56,10 +59,11 @@ xfs_bulkstat_one_iget(
 {
        xfs_dinode_core_t *dic;         /* dinode core info pointer */
        xfs_inode_t     *ip;            /* incore inode pointer */
-       vnode_t         *vp;
+       bhv_vnode_t     *vp;
        int             error;
 
-       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
+       error = xfs_iget(mp, NULL, ino,
+                        XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
        if (error) {
                *stat = BULKSTAT_RV_NOTHING;
                return error;
@@ -85,7 +89,7 @@ xfs_bulkstat_one_iget(
        buf->bs_mode = dic->di_mode;
        buf->bs_uid = dic->di_uid;
        buf->bs_gid = dic->di_gid;
-       buf->bs_xid = dic->di_xid;
+       buf->bs_tag = dic->di_tag;
        buf->bs_size = dic->di_size;
        vn_atime_to_bstime(vp, &buf->bs_atime);
        buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
@@ -160,7 +164,7 @@ xfs_bulkstat_one_dinode(
        buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
        buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
        buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
-       buf->bs_xid = INT_GET(dic->di_xid, ARCH_CONVERT);
+       buf->bs_tag = INT_GET(dic->di_tag, ARCH_CONVERT);
        buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
        buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
        buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
@@ -221,17 +225,12 @@ xfs_bulkstat_one(
        xfs_dinode_t    *dip;           /* dinode inode pointer */
 
        dip = (xfs_dinode_t *)dibuff;
+       *stat = BULKSTAT_RV_NOTHING;
 
-       if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
-           (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-            (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
-               *stat = BULKSTAT_RV_NOTHING;
+       if (!buffer || xfs_internal_inum(mp, ino))
                return XFS_ERROR(EINVAL);
-       }
-       if (ubsize < sizeof(*buf)) {
-               *stat = BULKSTAT_RV_NOTHING;
+       if (ubsize < sizeof(*buf))
                return XFS_ERROR(ENOMEM);
-       }
 
        buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
 
@@ -247,8 +246,7 @@ xfs_bulkstat_one(
        }
 
        if (copy_to_user(buffer, buf, sizeof(*buf)))  {
-               *stat = BULKSTAT_RV_NOTHING;
-               error =  EFAULT;
+               error = EFAULT;
                goto out_free;
        }
 
@@ -261,6 +259,46 @@ xfs_bulkstat_one(
        return error;
 }
 
+/*
+ * Test to see whether we can use the ondisk inode directly, based
+ * on the given bulkstat flags, filling in dipp accordingly.
+ * Returns zero if the inode is dodgey.
+ */
+STATIC int
+xfs_bulkstat_use_dinode(
+       xfs_mount_t     *mp,
+       int             flags,
+       xfs_buf_t       *bp,
+       int             clustidx,
+       xfs_dinode_t    **dipp)
+{
+       xfs_dinode_t    *dip;
+       unsigned int    aformat;
+
+       *dipp = NULL;
+       if (!bp || (flags & BULKSTAT_FG_IGET))
+               return 1;
+       dip = (xfs_dinode_t *)
+                       xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
+       if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC ||
+           !XFS_DINODE_GOOD_VERSION(
+                       INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
+               return 0;
+       if (flags & BULKSTAT_FG_QUICK) {
+               *dipp = dip;
+               return 1;
+       }
+       /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
+       aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT);
+       if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
+           (aformat == XFS_DINODE_FMT_LOCAL) ||
+           (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
+               *dipp = dip;
+               return 1;
+       }
+       return 1;
+}
+
 /*
  * Return stat information in bulk (by-inode) for the filesystem.
  */
@@ -274,7 +312,7 @@ xfs_bulkstat(
        size_t                  statstruct_size, /* sizeof struct filling */
        char                    __user *ubuffer, /* buffer with inode stats */
        int                     flags,  /* defined in xfs_itable.h */
-       int                     *done)  /* 1 if there're more stats to get */
+       int                     *done)  /* 1 if there are more stats to get */
 {
        xfs_agblock_t           agbno=0;/* allocation group block number */
        xfs_buf_t               *agbp;  /* agi header buffer */
@@ -293,10 +331,11 @@ xfs_bulkstat(
        xfs_agino_t             gino;   /* current btree rec's start inode */
        int                     i;      /* loop index */
        int                     icount; /* count of inodes good in irbuf */
+       size_t                  irbsize; /* size of irec buffer in bytes */
        xfs_ino_t               ino;    /* inode number (filesystem) */
-       xfs_inobt_rec_t         *irbp;  /* current irec buffer pointer */
-       xfs_inobt_rec_t         *irbuf; /* start of irec buffer */
-       xfs_inobt_rec_t         *irbufend; /* end of good irec buffer entries */
+       xfs_inobt_rec_incore_t  *irbp;  /* current irec buffer pointer */
+       xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
+       xfs_inobt_rec_incore_t  *irbufend; /* end of good irec buffer entries */
        xfs_ino_t               lastino=0; /* last inode number returned */
        int                     nbcluster; /* # of blocks in a cluster */
        int                     nicluster; /* # of inodes in a cluster */
@@ -337,22 +376,10 @@ xfs_bulkstat(
                (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
        nimask = ~(nicluster - 1);
        nbcluster = nicluster >> mp->m_sb.sb_inopblog;
-       /*
-        * Lock down the user's buffer. If a buffer was not sent, as in the case
-        * disk quota code calls here, we skip this.
-        */
-       if (ubuffer &&
-           (error = useracc(ubuffer, ubcount * statstruct_size,
-                       (B_READ|B_PHYS), NULL))) {
-               return error;
-       }
-       /*
-        * Allocate a page-sized buffer for inode btree records.
-        * We could try allocating something smaller, but for normal
-        * calls we'll always (potentially) need the whole page.
-        */
-       irbuf = kmem_alloc(NBPC, KM_SLEEP);
-       nirbuf = NBPC / sizeof(*irbuf);
+       irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4,
+                                  KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+       nirbuf = irbsize / sizeof(*irbuf);
+
        /*
         * Loop over the allocation groups, starting from the last
         * inode returned; 0 means start of the allocation group.
@@ -376,7 +403,7 @@ xfs_bulkstat(
                 * Allocate and initialize a btree cursor for ialloc btree.
                 */
                cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
-                       (xfs_inode_t *)0, 0);
+                                               (xfs_inode_t *)0, 0);
                irbp = irbuf;
                irbufend = irbuf + nirbuf;
                end_of_ag = 0;
@@ -413,9 +440,9 @@ xfs_bulkstat(
                                                gcnt++;
                                }
                                gfree |= XFS_INOBT_MASKN(0, chunkidx);
-                               INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
-                               INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
-                               INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+                               irbp->ir_startino = gino;
+                               irbp->ir_freecount = gcnt;
+                               irbp->ir_free = gfree;
                                irbp++;
                                agino = gino + XFS_INODES_PER_CHUNK;
                                icount = XFS_INODES_PER_CHUNK - gcnt;
@@ -469,11 +496,27 @@ xfs_bulkstat(
                        }
                        /*
                         * If this chunk has any allocated inodes, save it.
+                        * Also start read-ahead now for this chunk.
                         */
                        if (gcnt < XFS_INODES_PER_CHUNK) {
-                               INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
-                               INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
-                               INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+                               /*
+                                * Loop over all clusters in the next chunk.
+                                * Do a readahead if there are any allocated
+                                * inodes in that cluster.
+                                */
+                               for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
+                                    chunkidx = 0;
+                                    chunkidx < XFS_INODES_PER_CHUNK;
+                                    chunkidx += nicluster,
+                                    agbno += nbcluster) {
+                                       if (XFS_INOBT_MASKN(chunkidx,
+                                                           nicluster) & ~gfree)
+                                               xfs_btree_reada_bufs(mp, agno,
+                                                       agbno, nbcluster);
+                               }
+                               irbp->ir_startino = gino;
+                               irbp->ir_freecount = gcnt;
+                               irbp->ir_free = gfree;
                                irbp++;
                                icount += XFS_INODES_PER_CHUNK - gcnt;
                        }
@@ -496,34 +539,12 @@ xfs_bulkstat(
                irbufend = irbp;
                for (irbp = irbuf;
                     irbp < irbufend && ubleft >= statstruct_size; irbp++) {
-                       /*
-                        * Read-ahead the next chunk's worth of inodes.
-                        */
-                       if (&irbp[1] < irbufend) {
-                               /*
-                                * Loop over all clusters in the next chunk.
-                                * Do a readahead if there are any allocated
-                                * inodes in that cluster.
-                                */
-                               for (agbno = XFS_AGINO_TO_AGBNO(mp,
-                                                       INT_GET(irbp[1].ir_startino, ARCH_CONVERT)),
-                                    chunkidx = 0;
-                                    chunkidx < XFS_INODES_PER_CHUNK;
-                                    chunkidx += nicluster,
-                                    agbno += nbcluster) {
-                                       if (XFS_INOBT_MASKN(chunkidx,
-                                                           nicluster) &
-                                           ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT)))
-                                               xfs_btree_reada_bufs(mp, agno,
-                                                       agbno, nbcluster);
-                               }
-                       }
                        /*
                         * Now process this chunk of inodes.
                         */
-                       for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0;
+                       for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
                             ubleft > 0 &&
-                               INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK;
+                               irbp->ir_freecount < XFS_INODES_PER_CHUNK;
                             chunkidx++, clustidx++, agino++) {
                                ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
                                /*
@@ -543,11 +564,12 @@ xfs_bulkstat(
                                 */
                                if ((chunkidx & (nicluster - 1)) == 0) {
                                        agbno = XFS_AGINO_TO_AGBNO(mp,
-                                                       INT_GET(irbp->ir_startino, ARCH_CONVERT)) +
+                                                       irbp->ir_startino) +
                                                ((chunkidx & nimask) >>
                                                 mp->m_sb.sb_inopblog);
 
-                                       if (flags & BULKSTAT_FG_QUICK) {
+                                       if (flags & (BULKSTAT_FG_QUICK |
+                                                    BULKSTAT_FG_INLINE)) {
                                                ino = XFS_AGINO_TO_INO(mp, agno,
                                                                       agino);
                                                bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -561,10 +583,12 @@ xfs_bulkstat(
                                                                      KM_SLEEP);
                                                ip->i_ino = ino;
                                                ip->i_mount = mp;
+                                               spin_lock_init(&ip->i_flags_lock);
                                                if (bp)
                                                        xfs_buf_relse(bp);
                                                error = xfs_itobp(mp, NULL, ip,
-                                                                 &dip, &bp, bno);
+                                                               &dip, &bp, bno,
+                                                               XFS_IMAP_BULKSTAT);
                                                if (!error)
                                                        clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
                                                kmem_zone_free(xfs_inode_zone, ip);
@@ -572,6 +596,8 @@ xfs_bulkstat(
                                                                   mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
                                                                   XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
                                                        bp = NULL;
+                                                       ubleft = 0;
+                                                       rval = error;
                                                        break;
                                                }
                                        }
@@ -579,30 +605,34 @@ xfs_bulkstat(
                                /*
                                 * Skip if this inode is free.
                                 */
-                               if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT))
+                               if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
                                        continue;
                                /*
                                 * Count used inodes as free so we can tell
                                 * when the chunk is used up.
                                 */
-                               INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1);
+                               irbp->ir_freecount++;
                                ino = XFS_AGINO_TO_INO(mp, agno, agino);
                                bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
-                               if (flags & BULKSTAT_FG_QUICK) {
-                                       dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-                                             (clustidx << mp->m_sb.sb_inodelog));
-
-                                       if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT)
-                                                   != XFS_DINODE_MAGIC
-                                           || !XFS_DINODE_GOOD_VERSION(
-                                                   INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
-                                               continue;
+                               if (!xfs_bulkstat_use_dinode(mp, flags, bp,
+                                                            clustidx, &dip))
+                                       continue;
+                               /*
+                                * If we need to do an iget, cannot hold bp.
+                                * Drop it, until starting the next cluster.
+                                */
+                               if ((flags & BULKSTAT_FG_INLINE) && !dip) {
+                                       if (bp)
+                                               xfs_buf_relse(bp);
+                                       bp = NULL;
                                }
 
                                /*
                                 * Get the inode and fill in a single buffer.
                                 * BULKSTAT_FG_QUICK uses dip to fill it in.
                                 * BULKSTAT_FG_IGET uses igets.
+                                * BULKSTAT_FG_INLINE uses dip if we have an
+                                * inline attr fork, else igets.
                                 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
                                 * This is also used to count inodes/blks, etc
                                 * in xfs_qm_quotacheck.
@@ -612,8 +642,15 @@ xfs_bulkstat(
                                                ubleft, private_data,
                                                bno, &ubused, dip, &fmterror);
                                if (fmterror == BULKSTAT_RV_NOTHING) {
-                                       if (error == ENOMEM)
+                                        if (error == EFAULT) {
+                                                ubleft = 0;
+                                                rval = error;
+                                                break;
+                                        }
+                                       else if (error == ENOMEM)
                                                ubleft = 0;
+                                       else
+                                               lastino = ino;
                                        continue;
                                }
                                if (fmterror == BULKSTAT_RV_GIVEUP) {
@@ -648,9 +685,7 @@ xfs_bulkstat(
        /*
         * Done, we're either out of filesystem or space to put the data.
         */
-       kmem_free(irbuf, NBPC);
-       if (ubuffer)
-               unuseracc(ubuffer, ubcount * statstruct_size, (B_READ|B_PHYS));
+       kmem_free(irbuf, irbsize);
        *ubcountp = ubelem;
        if (agno >= mp->m_sb.sb_agcount) {
                /*
@@ -675,7 +710,7 @@ xfs_bulkstat_single(
        xfs_mount_t             *mp,    /* mount point for filesystem */
        xfs_ino_t               *lastinop, /* inode to return */
        char                    __user *buffer, /* buffer with inode stats */
-       int                     *done)  /* 1 if there're more stats to get */
+       int                     *done)  /* 1 if there are more stats to get */
 {
        int                     count;  /* count value for bulkstat call */
        int                     error;  /* return value */