patch-2_6_7-vs1_9_1_12

[linux-2.6.git] / fs / ntfs / mft.c
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c

index ed793fb..8875af9 100644 (file)
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
  /**
   * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
   *
- * Copyright (c) 2001-2003 Anton Altaparmakov
+ * Copyright (c) 2001-2004 Anton Altaparmakov
   * Copyright (c) 2002 Richard Russon
   *
   * This program/include file is free software; you can redistribute it and/or
@@ -9,13 +9,13 @@
   * by the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
   *
- * This program/include file is distributed in the hope that it will be 
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * This program/include file is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
- * along with this program (in the main directory of the Linux-NTFS 
+ * along with this program (in the main directory of the Linux-NTFS
   * distribution in the file COPYING); if not, write to the Free Software
   * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   */
@@ -102,6 +102,13 @@ int format_mft_record(ntfs_inode *ni, MFT_RECORD *mft_rec)
   */
  extern int ntfs_readpage(struct file *, struct page *);
  
+#ifdef NTFS_RW
+/**
+ * ntfs_mft_writepage - forward declaration, function is further below
+ */
+static int ntfs_mft_writepage(struct page *page, struct writeback_control *wbc);
+#endif /* NTFS_RW */
+
  /**
   * ntfs_mft_aops - address space operations for access to $MFT
   *
@@ -112,6 +119,10 @@ struct address_space_operations ntfs_mft_aops = {
         .readpage       = ntfs_readpage,        /* Fill page with data. */
         .sync_page      = block_sync_page,      /* Currently, just unplugs the
                                                    disk request queue. */
+#ifdef NTFS_RW
+       .writepage      = ntfs_mft_writepage,   /* Write out the dirty mft
+                                                  records in a page. */
+#endif /* NTFS_RW */
  };
  
  /**
@@ -430,3 +441,653 @@ unm_err_out:
         return m;
  }
  
+#ifdef NTFS_RW
+
+/**
+ * __mark_mft_record_dirty - set the mft record and the page containing it dirty
+ * @ni:                ntfs inode describing the mapped mft record
+ *
+ * Internal function.  Users should call mark_mft_record_dirty() instead.
+ *
+ * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
+ * as well as the page containing the mft record, dirty.  Also, mark the base
+ * vfs inode dirty.  This ensures that any changes to the mft record are
+ * written out to disk.
+ *
+ * NOTE:  We only set I_DIRTY_SYNC and I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
+ * on the base vfs inode, because even though file data may have been modified,
+ * it is dirty in the inode meta data rather than the data page cache of the
+ * inode, and thus there are no data pages that need writing out.  Therefore, a
+ * full mark_inode_dirty() is overkill.  A mark_inode_dirty_sync(), on the
+ * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
+ * ensure ->write_inode is called from generic_osync_inode() and this needs to
+ * happen or the file data would not necessarily hit the device synchronously,
+ * even though the vfs inode has the O_SYNC flag set.  Also, I_DIRTY_DATASYNC
+ * simply "feels" better than just I_DIRTY_SYNC, since the file data has not
+ * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
+ * would suggest.
+ */
+void __mark_mft_record_dirty(ntfs_inode *ni)
+{
+       struct page *page = ni->page;
+       ntfs_inode *base_ni;
+
+       ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+       BUG_ON(!page);
+       BUG_ON(NInoAttr(ni));
+
+       /*
+        * Set the page containing the mft record dirty.  This also marks the
+        * $MFT inode dirty (I_DIRTY_PAGES).
+        */
+       __set_page_dirty_nobuffers(page);
+
+       /* Determine the base vfs inode and mark it dirty, too. */
+       down(&ni->extent_lock);
+       if (likely(ni->nr_extents >= 0))
+               base_ni = ni;
+       else
+               base_ni = ni->ext.base_ntfs_ino;
+       up(&ni->extent_lock);
+       __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+}
+
+static const char *ntfs_please_email = "Please email "
+               "linux-ntfs-dev@lists.sourceforge.net and say that you saw "
+               "this message.  Thank you.";
+
+/**
+ * sync_mft_mirror_umount - synchronise an mft record to the mft mirror
+ * @ni:                ntfs inode whose mft record to synchronize
+ * @m:         mapped, mst protected (extent) mft record to synchronize
+ *
+ * Write the mapped, mst protected (extent) mft record @m described by the
+ * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr) bypassing
+ * the page cache and the $MFTMirr inode itself.
+ *
+ * This function is only for use at umount time when the mft mirror inode has
+ * already been disposed off.  We BUG() if we are called while the mft mirror
+ * inode is still attached to the volume.
+ *
+ * On success return 0.  On error return -errno.
+ *
+ * NOTE:  This function is not implemented yet as I am not convinced it can
+ * actually be triggered considering the sequence of commits we do in super.c::
+ * ntfs_put_super().  But just in case we provide this place holder as the
+ * alternative would be either to BUG() or to get a NULL pointer dereference
+ * and Oops.
+ */
+static int sync_mft_mirror_umount(ntfs_inode *ni, MFT_RECORD *m)
+{
+       ntfs_volume *vol = ni->vol;
+
+       BUG_ON(vol->mftmirr_ino);
+       ntfs_error(vol->sb, "Umount time mft mirror syncing is not "
+                       "implemented yet.  %s", ntfs_please_email);
+       return -EOPNOTSUPP;
+}
+
+/**
+ * sync_mft_mirror - synchronize an mft record to the mft mirror
+ * @ni:                ntfs inode whose mft record to synchronize
+ * @m:         mapped, mst protected (extent) mft record to synchronize
+ * @sync:      if true, wait for i/o completion
+ *
+ * Write the mapped, mst protected (extent) mft record @m described by the
+ * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr).
+ *
+ * On success return 0.  On error return -errno and set the volume errors flag
+ * in the ntfs_volume to which @ni belongs.
+ *
+ * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
+ *
+ * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
+ * schedule i/o via ->writepage or do it via kntfsd or whatever.
+ */
+static int sync_mft_mirror(ntfs_inode *ni, MFT_RECORD *m, int sync)
+{
+       ntfs_volume *vol = ni->vol;
+       struct page *page;
+       unsigned int blocksize = vol->sb->s_blocksize;
+       int max_bhs = vol->mft_record_size / blocksize;
+       struct buffer_head *bhs[max_bhs];
+       struct buffer_head *bh, *head;
+       u8 *kmirr;
+       unsigned int block_start, block_end, m_start, m_end;
+       int i_bhs, nr_bhs, err = 0;
+
+       ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+       BUG_ON(!max_bhs);
+       if (unlikely(!vol->mftmirr_ino)) {
+               /* This could happen during umount... */
+               err = sync_mft_mirror_umount(ni, m);
+               if (likely(!err))
+                       return err;
+               goto err_out;
+       }
+       /* Get the page containing the mirror copy of the mft record @m. */
+       page = ntfs_map_page(vol->mftmirr_ino->i_mapping, ni->mft_no >>
+                       (PAGE_CACHE_SHIFT - vol->mft_record_size_bits));
+       if (unlikely(IS_ERR(page))) {
+               ntfs_error(vol->sb, "Failed to map mft mirror page.");
+               err = PTR_ERR(page);
+               goto err_out;
+       }
+       /*
+        * Exclusion against other writers.   This should never be a problem
+        * since the page in which the mft record @m resides is also locked and
+        * hence any other writers would be held up there but it is better to
+        * make sure no one is writing from elsewhere.
+        */
+       lock_page(page);
+       /* The address in the page of the mirror copy of the mft record @m. */
+       kmirr = page_address(page) + ((ni->mft_no << vol->mft_record_size_bits)
+                       & ~PAGE_CACHE_MASK);
+       /* Copy the mst protected mft record to the mirror. */
+       memcpy(kmirr, m, vol->mft_record_size);
+       /* Make sure we have mapped buffers. */
+       if (!page_has_buffers(page)) {
+no_buffers_err_out:
+               ntfs_error(vol->sb, "Writing mft mirror records without "
+                               "existing buffers is not implemented yet.  %s",
+                               ntfs_please_email);
+               err = -EOPNOTSUPP;
+               goto unlock_err_out;
+       }
+       bh = head = page_buffers(page);
+       if (!bh)
+               goto no_buffers_err_out;
+       nr_bhs = 0;
+       block_start = 0;
+       m_start = kmirr - (u8*)page_address(page);
+       m_end = m_start + vol->mft_record_size;
+       do {
+               block_end = block_start + blocksize;
+               /*
+                * If the buffer is outside the mft record, just skip it,
+                * clearing it if it is dirty to make sure it is not written
+                * out.  It should never be marked dirty but better be safe.
+                */
+               if ((block_end <= m_start) || (block_start >= m_end)) {
+                       if (buffer_dirty(bh)) {
+                               ntfs_warning(vol->sb, "Clearing dirty mft "
+                                               "record page buffer.  %s",
+                                               ntfs_please_email);
+                               clear_buffer_dirty(bh);
+                       }
+                       continue;
+               }
+               if (!buffer_mapped(bh)) {
+                       ntfs_error(vol->sb, "Writing mft mirror records "
+                                       "without existing mapped buffers is "
+                                       "not implemented yet.  %s",
+                                       ntfs_please_email);
+                       err = -EOPNOTSUPP;
+                       continue;
+               }
+               if (!buffer_uptodate(bh)) {
+                       ntfs_error(vol->sb, "Writing mft mirror records "
+                                       "without existing uptodate buffers is "
+                                       "not implemented yet.  %s",
+                                       ntfs_please_email);
+                       err = -EOPNOTSUPP;
+                       continue;
+               }
+               BUG_ON(!nr_bhs && (m_start != block_start));
+               BUG_ON(nr_bhs >= max_bhs);
+               bhs[nr_bhs++] = bh;
+               BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
+       } while (block_start = block_end, (bh = bh->b_this_page) != head);
+       if (likely(!err)) {
+               /* Lock buffers and start synchronous write i/o on them. */
+               for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+                       struct buffer_head *tbh = bhs[i_bhs];
+
+                       if (unlikely(test_set_buffer_locked(tbh)))
+                               BUG();
+                       BUG_ON(!buffer_uptodate(tbh));
+                       if (buffer_dirty(tbh))
+                               clear_buffer_dirty(tbh);
+                       get_bh(tbh);
+                       tbh->b_end_io = end_buffer_write_sync;
+                       submit_bh(WRITE, tbh);
+               }
+               /* Wait on i/o completion of buffers. */
+               for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+                       struct buffer_head *tbh = bhs[i_bhs];
+
+                       wait_on_buffer(tbh);
+                       if (unlikely(!buffer_uptodate(tbh))) {
+                               err = -EIO;
+                               /*
+                                * Set the buffer uptodate so the page & buffer
+                                * states don't become out of sync.
+                                */
+                               if (PageUptodate(page))
+                                       set_buffer_uptodate(tbh);
+                       }
+               }
+       } else /* if (unlikely(err)) */ {
+               /* Clean the buffers. */
+               for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
+                       clear_buffer_dirty(bhs[i_bhs]);
+       }
+unlock_err_out:
+       /* Current state: all buffers are clean, unlocked, and uptodate. */
+       /* Remove the mst protection fixups again. */
+       post_write_mst_fixup((NTFS_RECORD*)kmirr);
+       flush_dcache_page(page);
+       unlock_page(page);
+       ntfs_unmap_page(page);
+       if (unlikely(err)) {
+               /* I/O error during writing.  This is really bad! */
+               ntfs_error(vol->sb, "I/O error while writing mft mirror "
+                               "record 0x%lx!  You should unmount the volume "
+                               "and run chkdsk or ntfsfix.", ni->mft_no);
+               goto err_out;
+       }
+       ntfs_debug("Done.");
+       return 0;
+err_out:
+       ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error code %i).  "
+                       "Volume will be left marked dirty on umount.  Run "
+                       "ntfsfix on the partition after umounting to correct "
+                       "this.", -err);
+       /* We don't want to clear the dirty bit on umount. */
+       NVolSetErrors(vol);
+       return err;
+}
+
+/**
+ * write_mft_record_nolock - write out a mapped (extent) mft record
+ * @ni:                ntfs inode describing the mapped (extent) mft record
+ * @m:         mapped (extent) mft record to write
+ * @sync:      if true, wait for i/o completion
+ *
+ * Write the mapped (extent) mft record @m described by the (regular or extent)
+ * ntfs inode @ni to backing store.  If the mft record @m has a counterpart in
+ * the mft mirror, that is also updated.
+ *
+ * On success, clean the mft record and return 0.  On error, leave the mft
+ * record dirty and return -errno.  The caller should call make_bad_inode() on
+ * the base inode to ensure no more access happens to this inode.  We do not do
+ * it here as the caller may want to finish writing other extent mft records
+ * first to minimize on-disk metadata inconsistencies.
+ *
+ * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
+ * However, if the mft record has a counterpart in the mft mirror and @sync is
+ * true, we write the mft record, wait for i/o completion, and only then write
+ * the mft mirror copy.  This ensures that if the system crashes either the mft
+ * or the mft mirror will contain a self-consistent mft record @m.  If @sync is
+ * false on the other hand, we start i/o on both and then wait for completion
+ * on them.  This provides a speedup but no longer guarantees that you will end
+ * up with a self-consistent mft record in the case of a crash but if you asked
+ * for asynchronous writing you probably do not care about that anyway.
+ *
+ * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
+ * schedule i/o via ->writepage or do it via kntfsd or whatever.
+ */
+int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
+{
+       ntfs_volume *vol = ni->vol;
+       struct page *page = ni->page;
+       unsigned int blocksize = vol->sb->s_blocksize;
+       int max_bhs = vol->mft_record_size / blocksize;
+       struct buffer_head *bhs[max_bhs];
+       struct buffer_head *bh, *head;
+       unsigned int block_start, block_end, m_start, m_end;
+       int i_bhs, nr_bhs, err = 0;
+
+       ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+       BUG_ON(NInoAttr(ni));
+       BUG_ON(!max_bhs);
+       BUG_ON(!page);
+       BUG_ON(!PageLocked(page));
+       /*
+        * If the ntfs_inode is clean no need to do anything.  If it is dirty,
+        * mark it as clean now so that it can be redirtied later on if needed.
+        * There is no danger of races as as long as the caller is holding the
+        * locks for the mft record @m and the page it is in.
+        */
+       if (!NInoTestClearDirty(ni))
+               goto done;
+       /* Make sure we have mapped buffers. */
+       if (!page_has_buffers(page)) {
+no_buffers_err_out:
+               ntfs_error(vol->sb, "Writing mft records without existing "
+                               "buffers is not implemented yet.  %s",
+                               ntfs_please_email);
+               err = -EOPNOTSUPP;
+               goto err_out;
+       }
+       bh = head = page_buffers(page);
+       if (!bh)
+               goto no_buffers_err_out;
+       nr_bhs = 0;
+       block_start = 0;
+       m_start = ni->page_ofs;
+       m_end = m_start + vol->mft_record_size;
+       do {
+               block_end = block_start + blocksize;
+               /*
+                * If the buffer is outside the mft record, just skip it,
+                * clearing it if it is dirty to make sure it is not written
+                * out.  It should never be marked dirty but better be safe.
+                */
+               if ((block_end <= m_start) || (block_start >= m_end)) {
+                       if (buffer_dirty(bh)) {
+                               ntfs_warning(vol->sb, "Clearing dirty mft "
+                                               "record page buffer.  %s",
+                                               ntfs_please_email);
+                               clear_buffer_dirty(bh);
+                       }
+                       continue;
+               }
+               if (!buffer_mapped(bh)) {
+                       ntfs_error(vol->sb, "Writing mft records without "
+                                       "existing mapped buffers is not "
+                                       "implemented yet.  %s",
+                                       ntfs_please_email);
+                       err = -EOPNOTSUPP;
+                       continue;
+               }
+               if (!buffer_uptodate(bh)) {
+                       ntfs_error(vol->sb, "Writing mft records without "
+                                       "existing uptodate buffers is not "
+                                       "implemented yet.  %s",
+                                       ntfs_please_email);
+                       err = -EOPNOTSUPP;
+                       continue;
+               }
+               BUG_ON(!nr_bhs && (m_start != block_start));
+               BUG_ON(nr_bhs >= max_bhs);
+               bhs[nr_bhs++] = bh;
+               BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
+       } while (block_start = block_end, (bh = bh->b_this_page) != head);
+       if (unlikely(err))
+               goto cleanup_out;
+       /* Apply the mst protection fixups. */
+       err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);
+       if (err) {
+               ntfs_error(vol->sb, "Failed to apply mst fixups!");
+               goto cleanup_out;
+       }
+       flush_dcache_mft_record_page(ni);
+       /* Lock buffers and start synchronous write i/o on them. */
+       for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+               struct buffer_head *tbh = bhs[i_bhs];
+
+               if (unlikely(test_set_buffer_locked(tbh)))
+                       BUG();
+               BUG_ON(!buffer_uptodate(tbh));
+               if (buffer_dirty(tbh))
+                       clear_buffer_dirty(tbh);
+               get_bh(tbh);
+               tbh->b_end_io = end_buffer_write_sync;
+               submit_bh(WRITE, tbh);
+       }
+       /* Synchronize the mft mirror now if not @sync. */
+       if (!sync && ni->mft_no < vol->mftmirr_size)
+               sync_mft_mirror(ni, m, sync);
+       /* Wait on i/o completion of buffers. */
+       for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+               struct buffer_head *tbh = bhs[i_bhs];
+
+               wait_on_buffer(tbh);
+               if (unlikely(!buffer_uptodate(tbh))) {
+                       err = -EIO;
+                       /*
+                        * Set the buffer uptodate so the page & buffer states
+                        * don't become out of sync.
+                        */
+                       if (PageUptodate(page))
+                               set_buffer_uptodate(tbh);
+               }
+       }
+       /* If @sync, now synchronize the mft mirror. */
+       if (sync && ni->mft_no < vol->mftmirr_size)
+               sync_mft_mirror(ni, m, sync);
+       /* Remove the mst protection fixups again. */
+       post_write_mst_fixup((NTFS_RECORD*)m);
+       flush_dcache_mft_record_page(ni);
+       if (unlikely(err)) {
+               /* I/O error during writing.  This is really bad! */
+               ntfs_error(vol->sb, "I/O error while writing mft record "
+                               "0x%lx!  Marking base inode as bad.  You "
+                               "should unmount the volume and run chkdsk.",
+                               ni->mft_no);
+               goto err_out;
+       }
+done:
+       ntfs_debug("Done.");
+       return 0;
+cleanup_out:
+       /* Clean the buffers. */
+       for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
+               clear_buffer_dirty(bhs[i_bhs]);
+err_out:
+       /*
+        * Current state: all buffers are clean, unlocked, and uptodate.
+        * The caller should mark the base inode as bad so that no more i/o
+        * happens.  ->clear_inode() will still be invoked so all extent inodes
+        * and other allocated memory will be freed.
+        */
+       if (err == -ENOMEM) {
+               ntfs_error(vol->sb, "Not enough memory to write mft record.  "
+                               "Redirtying so the write is retried later.");
+               mark_mft_record_dirty(ni);
+               err = 0;
+       }
+       return err;
+}
+
+/**
+ * ntfs_mft_writepage - check if a metadata page contains dirty mft records
+ * @page:      metadata page possibly containing dirty mft records
+ * @wbc:       writeback control structure
+ *
+ * This is called from the VM when it wants to have a dirty $MFT/$DATA metadata
+ * page cache page cleaned.  The VM has already locked the page and marked it
+ * clean.  Instead of writing the page as a conventional ->writepage function
+ * would do, we check if the page still contains any dirty mft records (it must
+ * have done at some point in the past since the page was marked dirty) and if
+ * none are found, i.e. all mft records are clean, we unlock the page and
+ * return.  The VM is then free to do with the page as it pleases.  If on the
+ * other hand we do find any dirty mft records in the page, we redirty the page
+ * before unlocking it and returning so the VM knows that the page is still
+ * busy and cannot be thrown out.
+ *
+ * Note, we do not actually write any dirty mft records here because they are
+ * dirty inodes and hence will be written by the VFS inode dirty code paths.
+ * There is no need to write them from the VM page dirty code paths, too and in
+ * fact once we implement journalling it would be a complete nightmare having
+ * two code paths leading to mft record writeout.
+ */
+static int ntfs_mft_writepage(struct page *page, struct writeback_control *wbc)
+{
+       struct inode *mft_vi = page->mapping->host;
+       struct super_block *sb = mft_vi->i_sb;
+       ntfs_volume *vol = NTFS_SB(sb);
+       u8 *maddr;
+       MFT_RECORD *m;
+       ntfs_inode **extent_nis;
+       unsigned long mft_no;
+       int nr, i, j;
+       BOOL is_dirty = FALSE;
+
+       BUG_ON(mft_vi != vol->mft_ino);
+       /* The first mft record number in the page. */
+       mft_no = page->index << (PAGE_CACHE_SHIFT - vol->mft_record_size_bits);
+       /* Number of mft records in the page. */
+       nr = PAGE_CACHE_SIZE >> vol->mft_record_size_bits;
+       BUG_ON(!nr);
+       ntfs_debug("Entering for %i inodes starting at 0x%lx.", nr, mft_no);
+       /* Iterate over the mft records in the page looking for a dirty one. */
+       maddr = (u8*)kmap(page);
+       for (i = 0; i < nr; ++i, ++mft_no, maddr += vol->mft_record_size) {
+               struct inode *vi;
+               ntfs_inode *ni, *eni;
+               ntfs_attr na;
+
+               na.mft_no = mft_no;
+               na.name = NULL;
+               na.name_len = 0;
+               na.type = AT_UNUSED;
+               /*
+                * Check if the inode corresponding to this mft record is in
+                * the VFS inode cache and obtain a reference to it if it is.
+                */
+               ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);
+               /*
+                * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from
+                * here or we deadlock because the inode is already locked by
+                * the kernel (fs/fs-writeback.c::__sync_single_inode()) and
+                * ilookup5() waits until the inode is unlocked before
+                * returning it and it never gets unlocked because
+                * ntfs_mft_writepage() never returns.  )-:  Fortunately, we
+                * have inode 0 pinned in icache for the duration of the mount
+                * so we can access it directly.
+                */
+               if (!mft_no) {
+                       /* Balance the below iput(). */
+                       vi = igrab(mft_vi);
+                       BUG_ON(vi != mft_vi);
+               } else
+                       vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na);
+               if (vi) {
+                       ntfs_debug("Inode 0x%lx is in icache.", mft_no);
+                       /* The inode is in icache.  Check if it is dirty. */
+                       ni = NTFS_I(vi);
+                       if (!NInoDirty(ni)) {
+                               /* The inode is not dirty, skip this record. */
+                               ntfs_debug("Inode 0x%lx is not dirty, "
+                                               "continuing search.", mft_no);
+                               iput(vi);
+                               continue;
+                       }
+                       ntfs_debug("Inode 0x%lx is dirty, aborting search.",
+                                       mft_no);
+                       /* The inode is dirty, no need to search further. */
+                       iput(vi);
+                       is_dirty = TRUE;
+                       break;
+               }
+               ntfs_debug("Inode 0x%lx is not in icache.", mft_no);
+               /* The inode is not in icache. */
+               /* Skip the record if it is not a mft record (type "FILE"). */
+               if (!ntfs_is_mft_recordp(maddr)) {
+                       ntfs_debug("Mft record 0x%lx is not a FILE record, "
+                                       "continuing search.", mft_no);
+                       continue;
+               }
+               m = (MFT_RECORD*)maddr;
+               /*
+                * Skip the mft record if it is not in use.  FIXME:  What about
+                * deleted/deallocated (extent) inodes?  (AIA)
+                */
+               if (!(m->flags & MFT_RECORD_IN_USE)) {
+                       ntfs_debug("Mft record 0x%lx is not in use, "
+                                       "continuing search.", mft_no);
+                       continue;
+               }
+               /* Skip the mft record if it is a base inode. */
+               if (!m->base_mft_record) {
+                       ntfs_debug("Mft record 0x%lx is a base record, "
+                                       "continuing search.", mft_no);
+                       continue;
+               }
+               /*
+                * This is an extent mft record.  Check if the inode
+                * corresponding to its base mft record is in icache.
+                */
+               na.mft_no = MREF_LE(m->base_mft_record);
+               ntfs_debug("Mft record 0x%lx is an extent record.  Looking "
+                               "for base inode 0x%lx in icache.", mft_no,
+                               na.mft_no);
+               vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode,
+                               &na);
+               if (!vi) {
+                       /*
+                        * The base inode is not in icache.  Skip this extent
+                        * mft record.
+                        */
+                       ntfs_debug("Base inode 0x%lx is not in icache, "
+                                       "continuing search.", na.mft_no);
+                       continue;
+               }
+               ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);
+               /*
+                * The base inode is in icache.  Check if it has the extent
+                * inode corresponding to this extent mft record attached.
+                */
+               ni = NTFS_I(vi);
+               down(&ni->extent_lock);
+               if (ni->nr_extents <= 0) {
+                       /*
+                        * The base inode has no attached extent inodes.  Skip
+                        * this extent mft record.
+                        */
+                       up(&ni->extent_lock);
+                       iput(vi);
+                       continue;
+               }
+               /* Iterate over the attached extent inodes. */
+               extent_nis = ni->ext.extent_ntfs_inos;
+               for (eni = NULL, j = 0; j < ni->nr_extents; ++j) {
+                       if (mft_no == extent_nis[j]->mft_no) {
+                               /*
+                                * Found the extent inode corresponding to this
+                                * extent mft record.
+                                */
+                               eni = extent_nis[j];
+                               break;
+                       }
+               }
+               /*
+                * If the extent inode was not attached to the base inode, skip
+                * this extent mft record.
+                */
+               if (!eni) {
+                       up(&ni->extent_lock);
+                       iput(vi);
+                       continue;
+               }
+               /*
+                * Found the extent inode corrsponding to this extent mft
+                * record.  If it is dirty, no need to search further.
+                */
+               if (NInoDirty(eni)) {
+                       up(&ni->extent_lock);
+                       iput(vi);
+                       is_dirty = TRUE;
+                       break;
+               }
+               /* The extent inode is not dirty, so do the next record. */
+               up(&ni->extent_lock);
+               iput(vi);
+       }
+       kunmap(page);
+       /* If a dirty mft record was found, redirty the page. */
+       if (is_dirty) {
+               ntfs_debug("Inode 0x%lx is dirty.  Redirtying the page "
+                               "starting at inode 0x%lx.", mft_no,
+                               page->index << (PAGE_CACHE_SHIFT -
+                               vol->mft_record_size_bits));
+               redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
+       } else {
+               /*
+                * Keep the VM happy.  This must be done otherwise the
+                * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
+                * the page is clean.
+                */
+               BUG_ON(PageWriteback(page));
+               set_page_writeback(page);
+               unlock_page(page);
+               end_page_writeback(page);
+       }
+       ntfs_debug("Done.");
+       return 0;
+}
+
+#endif /* NTFS_RW */