X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fntfs%2Fmft.c;h=2ad5c8b104b934c9177e162d2c20c6da211ed0d9;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=dfa85ac2f8bad8d5f861f3da79025adf3730739b;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index dfa85ac2f..2ad5c8b10 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2006 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -45,10 +45,12 @@ */ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) { + loff_t i_size; ntfs_volume *vol = ni->vol; struct inode *mft_vi = vol->mft_ino; struct page *page; - unsigned long index, ofs, end_index; + unsigned long index, end_index; + unsigned ofs; BUG_ON(ni->page); /* @@ -57,16 +59,18 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) * overflowing the unsigned long, but I don't think we would ever get * here if the volume was that big... */ - index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; + index = (u64)ni->mft_no << vol->mft_record_size_bits >> + PAGE_CACHE_SHIFT; ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; + i_size = i_size_read(mft_vi); /* The maximum valid index into the page cache for $MFT's data. */ - end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + end_index = i_size >> PAGE_CACHE_SHIFT; /* If the wanted index is out of bounds the mft record doesn't exist. */ if (unlikely(index >= end_index)) { - if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < - ofs + vol->mft_record_size) { + if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + + vol->mft_record_size) { page = ERR_PTR(-ENOENT); ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " "which is beyond the end of the mft. " @@ -89,6 +93,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) "Run chkdsk.", ni->mft_no); ntfs_unmap_page(page); page = ERR_PTR(-EIO); + NVolSetErrors(vol); } err_out: ni->page = NULL; @@ -100,8 +105,8 @@ err_out: * map_mft_record - map, pin and lock an mft record * @ni: ntfs inode whose MFT record to map * - * First, take the mrec_lock semaphore. We might now be sleeping, while waiting - * for the semaphore if it was already locked by someone else. + * First, take the mrec_lock mutex. We might now be sleeping, while waiting + * for the mutex if it was already locked by someone else. * * The page of the record is mapped using map_mft_record_page() before being * returned to the caller. @@ -131,9 +136,9 @@ err_out: * So that code will end up having to own the mrec_lock of all mft * records/inodes present in the page before I/O can proceed. In that case we * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be - * accessing anything without owning the mrec_lock semaphore. But we do need - * to use them because of the read_cache_page() invocation and the code becomes - * so much simpler this way that it is well worth it. + * accessing anything without owning the mrec_lock mutex. But we do need to + * use them because of the read_cache_page() invocation and the code becomes so + * much simpler this way that it is well worth it. * * The mft record is now ours and we return a pointer to it. You need to check * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return @@ -156,13 +161,13 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni) atomic_inc(&ni->count); /* Serialize access to this mft record. */ - down(&ni->mrec_lock); + mutex_lock(&ni->mrec_lock); m = map_mft_record_page(ni); if (likely(!IS_ERR(m))) return m; - up(&ni->mrec_lock); + mutex_unlock(&ni->mrec_lock); atomic_dec(&ni->count); ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); return m; @@ -213,7 +218,7 @@ void unmap_mft_record(ntfs_inode *ni) ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); unmap_mft_record_page(ni); - up(&ni->mrec_lock); + mutex_unlock(&ni->mrec_lock); atomic_dec(&ni->count); /* * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to @@ -246,7 +251,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, int i; unsigned long mft_no = MREF(mref); u16 seq_no = MSEQNO(mref); - BOOL destroy_ni = FALSE; + bool destroy_ni = false; ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", mft_no, base_ni->mft_no); @@ -257,7 +262,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, * in which case just return it. If not found, add it to the base * inode before returning it. */ - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); if (base_ni->nr_extents > 0) { extent_nis = base_ni->ext.extent_ntfs_inos; for (i = 0; i < base_ni->nr_extents; i++) { @@ -270,7 +275,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, } } if (likely(ni != NULL)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); /* We found the record; just have to map and return it. */ m = map_mft_record(ni); @@ -285,7 +290,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, } unmap_mft_record(ni); ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. " + "reference! Corrupt filesystem. " "Run chkdsk."); return ERR_PTR(-EIO); } @@ -297,7 +302,7 @@ map_err_out: /* Record wasn't there. Get a new ntfs inode and initialize it. */ ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); if (unlikely(!ni)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); return ERR_PTR(-ENOMEM); } @@ -308,7 +313,7 @@ map_err_out: /* Now map the record. */ m = map_mft_record(ni); if (IS_ERR(m)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); ntfs_clear_extent_inode(ni); goto map_err_out; @@ -316,8 +321,8 @@ map_err_out: /* Verify the sequence number if it is present. */ if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. Run chkdsk."); - destroy_ni = TRUE; + "reference! Corrupt filesystem. Run chkdsk."); + destroy_ni = true; m = ERR_PTR(-EIO); goto unm_err_out; } @@ -326,11 +331,11 @@ map_err_out: ntfs_inode **tmp; int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); - tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); + tmp = kmalloc(new_size, GFP_NOFS); if (unlikely(!tmp)) { ntfs_error(base_ni->vol->sb, "Failed to allocate " "internal buffer."); - destroy_ni = TRUE; + destroy_ni = true; m = ERR_PTR(-ENOMEM); goto unm_err_out; } @@ -343,14 +348,14 @@ map_err_out: base_ni->ext.extent_ntfs_inos = tmp; } base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); ntfs_debug("Done 2."); *ntfs_ino = ni; return m; unm_err_out: unmap_mft_record(ni); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); /* * If the extent inode was not attached to the base inode we need to @@ -395,12 +400,12 @@ void __mark_mft_record_dirty(ntfs_inode *ni) BUG_ON(NInoAttr(ni)); mark_ntfs_record_dirty(ni->page, ni->page_ofs); /* Determine the base vfs inode and mark it dirty, too. */ - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); if (likely(ni->nr_extents >= 0)) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC); } @@ -469,7 +474,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, runlist_element *rl; unsigned int block_start, block_end, m_start, m_end, page_ofs; int i_bhs, nr_bhs, err = 0; - unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits; + unsigned char blocksize_bits = vol->sb->s_blocksize_bits; ntfs_debug("Entering for inode 0x%lx.", mft_no); BUG_ON(!max_bhs); @@ -509,7 +514,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, } while (bh); tail->b_this_page = head; attach_page_buffers(page, head); - BUG_ON(!page_has_buffers(page)); } bh = head = page_buffers(page); BUG_ON(!bh); @@ -531,6 +535,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, LCN lcn; unsigned int vcn_ofs; + bh->b_bdev = vol->sb->s_bdev; /* Obtain the vcn and offset of the current block. */ vcn = ((VCN)mft_no << vol->mft_record_size_bits) + (block_start - m_start); @@ -646,10 +651,7 @@ err_out: * fs/ntfs/aops.c::mark_ntfs_record_dirty(). * * On success, clean the mft record and return 0. On error, leave the mft - * record dirty and return -errno. The caller should call make_bad_inode() on - * the base inode to ensure no more access happens to this inode. We do not do - * it here as the caller may want to finish writing other extent mft records - * first to minimize on-disk metadata inconsistencies. + * record dirty and return -errno. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * However, if the mft record has a counterpart in the mft mirror and @sync is @@ -668,8 +670,8 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page = ni->page; - unsigned char blocksize_bits = vol->mft_ino->i_blkbits; - unsigned int blocksize = 1 << blocksize_bits; + unsigned int blocksize = vol->sb->s_blocksize; + unsigned char blocksize_bits = vol->sb->s_blocksize_bits; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; @@ -689,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) */ if (!NInoTestClearDirty(ni)) goto done; - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); rl = NULL; @@ -723,6 +724,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) LCN lcn; unsigned int vcn_ofs; + bh->b_bdev = vol->sb->s_bdev; /* Obtain the vcn and offset of the current block. */ vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + (block_start - m_start); @@ -855,7 +857,7 @@ err_out: * caller is responsible for unlocking the ntfs inode and unpinning the base * vfs inode. * - * Return TRUE if the mft record may be written out and FALSE if not. + * Return 'true' if the mft record may be written out and 'false' if not. * * The caller has locked the page and cleared the uptodate flag on it which * means that we can safely write out any dirty mft records that do not have @@ -866,7 +868,7 @@ err_out: * Here is a description of the tests we perform: * * If the inode is found in icache we know the mft record must be a base mft - * record. If it is dirty, we do not write it and return FALSE as the vfs + * record. If it is dirty, we do not write it and return 'false' as the vfs * inode write paths will result in the access times being updated which would * cause the base mft record to be redirtied and written out again. (We know * the access time update will modify the base mft record because Windows @@ -875,11 +877,11 @@ err_out: * * If the inode is in icache and not dirty, we attempt to lock the mft record * and if we find the lock was already taken, it is not safe to write the mft - * record and we return FALSE. + * record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the mft record, * which also allows us safe writeout of the mft record. We then set - * @locked_ni to the locked ntfs inode and return TRUE. + * @locked_ni to the locked ntfs inode and return 'true'. * * Note we cannot just lock the mft record and sleep while waiting for the lock * because this would deadlock due to lock reversal (normally the mft record is @@ -889,24 +891,24 @@ err_out: * If the inode is not in icache we need to perform further checks. * * If the mft record is not a FILE record or it is a base mft record, we can - * safely write it and return TRUE. + * safely write it and return 'true'. * * We now know the mft record is an extent mft record. We check if the inode * corresponding to its base mft record is in icache and obtain a reference to - * it if it is. If it is not, we can safely write it and return TRUE. + * it if it is. If it is not, we can safely write it and return 'true'. * * We now have the base inode for the extent mft record. We check if it has an * ntfs inode for the extent mft record attached and if not it is safe to write - * the extent mft record and we return TRUE. + * the extent mft record and we return 'true'. * * The ntfs inode for the extent mft record is attached to the base inode so we * attempt to lock the extent mft record and if we find the lock was already - * taken, it is not safe to write the extent mft record and we return FALSE. + * taken, it is not safe to write the extent mft record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the extent mft * record, which also allows us safe writeout of the extent mft record. We * set the ntfs inode of the extent mft record clean and then set @locked_ni to - * the now locked ntfs inode and return TRUE. + * the now locked ntfs inode and return 'true'. * * Note, the reason for actually writing dirty mft records here and not just * relying on the vfs inode dirty code paths is that we can have mft records @@ -920,7 +922,7 @@ err_out: * appear if the mft record is reused for a new inode before it got written * out. */ -BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, +bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, const MFT_RECORD *m, ntfs_inode **locked_ni) { struct super_block *sb = vol->sb; @@ -946,20 +948,23 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, na.name_len = 0; na.type = AT_UNUSED; /* - * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or - * we deadlock because the inode is already locked by the kernel - * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits - * until the inode is unlocked before returning it and it never gets - * unlocked because ntfs_should_write_mft_record() never returns. )-: - * Fortunately, we have inode 0 pinned in icache for the duration of - * the mount so we can access it directly. + * Optimize inode 0, i.e. $MFT itself, since we have it in memory and + * we get here for it rather often. */ if (!mft_no) { /* Balance the below iput(). */ vi = igrab(mft_vi); BUG_ON(vi != mft_vi); - } else - vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na); + } else { + /* + * Have to use ilookup5_nowait() since ilookup5() waits for the + * inode lock which causes ntfs to deadlock when a concurrent + * inode write via the inode dirty code paths and the page + * dirty code path of the inode dirty code path when writing + * $MFT occurs. + */ + vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na); + } if (vi) { ntfs_debug("Base inode 0x%lx is in icache.", mft_no); /* The inode is in icache. */ @@ -972,16 +977,16 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, mft_no); atomic_dec(&ni->count); iput(vi); - return FALSE; + return false; } ntfs_debug("Inode 0x%lx is not dirty.", mft_no); /* The inode is not dirty, try to take the mft record lock. */ - if (unlikely(down_trylock(&ni->mrec_lock))) { + if (unlikely(!mutex_trylock(&ni->mrec_lock))) { ntfs_debug("Mft record 0x%lx is already locked, do " "not write it.", mft_no); atomic_dec(&ni->count); iput(vi); - return FALSE; + return false; } ntfs_debug("Managed to lock mft record 0x%lx, write it.", mft_no); @@ -990,7 +995,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * return the locked ntfs inode. */ *locked_ni = ni; - return TRUE; + return true; } ntfs_debug("Inode 0x%lx is not in icache.", mft_no); /* The inode is not in icache. */ @@ -998,13 +1003,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, if (!ntfs_is_mft_record(m->magic)) { ntfs_debug("Mft record 0x%lx is not a FILE record, write it.", mft_no); - return TRUE; + return true; } /* Write the mft record if it is a base inode. */ if (!m->base_mft_record) { ntfs_debug("Mft record 0x%lx is a base record, write it.", mft_no); - return TRUE; + return true; } /* * This is an extent mft record. Check if the inode corresponding to @@ -1014,7 +1019,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, na.mft_no = MREF_LE(m->base_mft_record); ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " "inode 0x%lx in icache.", mft_no, na.mft_no); - vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na); + if (!na.mft_no) { + /* Balance the below iput(). */ + vi = igrab(mft_vi); + BUG_ON(vi != mft_vi); + } else + vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode, + &na); if (!vi) { /* * The base inode is not in icache, write this extent mft @@ -1022,7 +1033,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, */ ntfs_debug("Base inode 0x%lx is not in icache, write the " "extent record.", na.mft_no); - return TRUE; + return true; } ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no); /* @@ -1030,17 +1041,17 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * corresponding to this extent mft record attached. */ ni = NTFS_I(vi); - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); if (ni->nr_extents <= 0) { /* * The base inode has no attached extent inodes, write this * extent mft record. */ - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); iput(vi); ntfs_debug("Base inode 0x%lx has no attached extent inodes, " "write the extent record.", na.mft_no); - return TRUE; + return true; } /* Iterate over the attached extent inodes. */ extent_nis = ni->ext.extent_ntfs_inos; @@ -1059,28 +1070,28 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * extent mft record. */ if (!eni) { - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); iput(vi); ntfs_debug("Extent inode 0x%lx is not attached to its base " "inode 0x%lx, write the extent record.", mft_no, na.mft_no); - return TRUE; + return true; } ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.", mft_no, na.mft_no); /* Take a reference to the extent ntfs inode. */ atomic_inc(&eni->count); - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); /* * Found the extent inode coresponding to this extent mft record. * Try to take the mft record lock. */ - if (unlikely(down_trylock(&eni->mrec_lock))) { + if (unlikely(!mutex_trylock(&eni->mrec_lock))) { atomic_dec(&eni->count); iput(vi); ntfs_debug("Extent mft record 0x%lx is already locked, do " "not write it.", mft_no); - return FALSE; + return false; } ntfs_debug("Managed to lock extent mft record 0x%lx, write it.", mft_no); @@ -1092,7 +1103,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * the locked extent ntfs inode. */ *locked_ni = eni; - return TRUE; + return true; } static const char *es = " Leaving inconsistent metadata. Unmount and run " @@ -1121,6 +1132,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, ntfs_inode *base_ni) { s64 pass_end, ll, data_pos, pass_start, ofs, bit; + unsigned long flags; struct address_space *mftbmp_mapping; u8 *buf, *byte; struct page *page; @@ -1134,9 +1146,13 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, * Set the end of the pass making sure we do not overflow the mft * bitmap. */ + read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); pass_end = NTFS_I(vol->mft_ino)->allocated_size >> vol->mft_record_size_bits; + read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); + read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; + read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); if (pass_end > ll) pass_end = ll; pass = 1; @@ -1263,6 +1279,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) { LCN lcn; s64 ll; + unsigned long flags; struct page *page; ntfs_inode *mft_ni, *mftbmp_ni; runlist_element *rl, *rl2 = NULL; @@ -1284,17 +1301,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) /* * Determine the last lcn of the mft bitmap. The allocated size of the * mft bitmap cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ - rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >> - vol->cluster_size_bits, TRUE); + down_write(&mftbmp_ni->runlist.lock); + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + ll = mftbmp_ni->allocated_size; + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, + (ll - 1) >> vol->cluster_size_bits, NULL); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft bitmap attribute."); - if (!IS_ERR(rl)) { - up_write(&mftbmp_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } @@ -1333,7 +1353,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) up_write(&vol->lcnbmp_lock); ntfs_unmap_page(page); /* Allocate a cluster from the DATA_ZONE. */ - rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE); + rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, + true); if (IS_ERR(rl2)) { up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to allocate a cluster for " @@ -1396,7 +1417,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) BUG_ON(ll < rl2->vcn); BUG_ON(ll >= rl2->vcn + rl2->length); /* Get the size for the new mapping pairs array for this extent. */ - mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); + mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); if (unlikely(mp_size <= 0)) { ntfs_error(vol->sb, "Get size for mapping pairs failed for " "mft bitmap attribute extent."); @@ -1418,6 +1439,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) // TODO: Deal with this by moving this extent to a new mft // record or by starting a new extent in a new mft record or by // moving other attributes out of this mft record. + // Note: It will need to be a special mft record and if none of + // those are available it gets rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft bitmap attribute " "extent. Cannot handle this yet."); @@ -1428,7 +1451,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) /* Generate the mapping pairs array directly into the attr record. */ ret = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(a->data.non_resident.mapping_pairs_offset), - mp_size, rl2, ll, NULL); + mp_size, rl2, ll, -1, NULL); if (unlikely(ret)) { ntfs_error(vol->sb, "Failed to build mapping pairs array for " "mft bitmap attribute."); @@ -1458,9 +1481,11 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) } a = ctx->attr; } + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->allocated_size += vol->cluster_size; a->data.non_resident.allocated_size = cpu_to_sle64(mftbmp_ni->allocated_size); + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1476,7 +1501,9 @@ restore_undo_alloc: 0, ctx)) { ntfs_error(vol->sb, "Failed to find last attribute extent of " "mft bitmap attribute.%s", es); + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->allocated_size += vol->cluster_size; + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); up_write(&mftbmp_ni->runlist.lock); @@ -1512,7 +1539,7 @@ undo_alloc: a->data.non_resident.mapping_pairs_offset), old_alen - le16_to_cpu( a->data.non_resident.mapping_pairs_offset), - rl2, ll, NULL)) { + rl2, ll, -1, NULL)) { ntfs_error(vol->sb, "Failed to restore mapping pairs " "array.%s", es); NVolSetErrors(vol); @@ -1550,6 +1577,7 @@ undo_alloc: static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) { s64 old_data_size, old_initialized_size; + unsigned long flags; struct inode *mftbmp_vi; ntfs_inode *mft_ni, *mftbmp_ni; ntfs_attr_search_ctx *ctx; @@ -1583,7 +1611,8 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) goto put_err_out; } a = ctx->attr; - old_data_size = mftbmp_vi->i_size; + write_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_size = i_size_read(mftbmp_vi); old_initialized_size = mftbmp_ni->initialized_size; /* * We can simply update the initialized_size before filling the space @@ -1593,11 +1622,12 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) mftbmp_ni->initialized_size += 8; a->data.non_resident.initialized_size = cpu_to_sle64(mftbmp_ni->initialized_size); - if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) { - mftbmp_vi->i_size = mftbmp_ni->initialized_size; + if (mftbmp_ni->initialized_size > old_data_size) { + i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); a->data.non_resident.data_size = - cpu_to_sle64(mftbmp_vi->i_size); + cpu_to_sle64(mftbmp_ni->initialized_size); } + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1636,22 +1666,28 @@ unm_err_out: goto err_out; } a = ctx->attr; + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->initialized_size = old_initialized_size; a->data.non_resident.initialized_size = cpu_to_sle64(old_initialized_size); - if (mftbmp_vi->i_size != old_data_size) { - mftbmp_vi->i_size = old_data_size; + if (i_size_read(mftbmp_vi) != old_data_size) { + i_size_write(mftbmp_vi, old_data_size); a->data.non_resident.data_size = cpu_to_sle64(old_data_size); } + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " "data_size 0x%llx, initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)mftbmp_vi->i_size, + (long long)i_size_read(mftbmp_vi), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ err_out: return ret; } @@ -1679,7 +1715,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) { LCN lcn; VCN old_last_vcn; - s64 min_nr, nr, ll = 0; + s64 min_nr, nr, ll; + unsigned long flags; ntfs_inode *mft_ni; runlist_element *rl, *rl2; ntfs_attr_search_ctx *ctx = NULL; @@ -1687,7 +1724,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ATTR_RECORD *a = NULL; int ret, mp_size; u32 old_alen = 0; - BOOL mp_rebuilt = FALSE; + bool mp_rebuilt = false; ntfs_debug("Extending mft data allocation."); mft_ni = NTFS_I(vol->mft_ino); @@ -1695,23 +1732,25 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) * Determine the preferred allocation location, i.e. the last lcn of * the mft data attribute. The allocated size of the mft data * attribute cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ - rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >> - vol->cluster_size_bits, TRUE); + down_write(&mft_ni->runlist.lock); + read_lock_irqsave(&mft_ni->size_lock, flags); + ll = mft_ni->allocated_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + rl = ntfs_attr_find_vcn_nolock(mft_ni, + (ll - 1) >> vol->cluster_size_bits, NULL); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mft_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft data attribute."); - if (!IS_ERR(rl)) { - up_write(&mft_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } lcn = rl->lcn + rl->length; - ntfs_debug("Last lcn of mft data attribute is 0x%llx.", - (long long)lcn); + ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn); /* Minimum allocation is one mft record worth of clusters. */ min_nr = vol->mft_record_size >> vol->cluster_size_bits; if (!min_nr) @@ -1721,12 +1760,13 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) if (!nr) nr = min_nr; /* Ensure we do not go above 2^32-1 mft records. */ - if (unlikely((mft_ni->allocated_size + - (nr << vol->cluster_size_bits)) >> + read_lock_irqsave(&mft_ni->size_lock, flags); + ll = mft_ni->allocated_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + if (unlikely((ll + (nr << vol->cluster_size_bits)) >> vol->mft_record_size_bits >= (1ll << 32))) { nr = min_nr; - if (unlikely((mft_ni->allocated_size + - (nr << vol->cluster_size_bits)) >> + if (unlikely((ll + (nr << vol->cluster_size_bits)) >> vol->mft_record_size_bits >= (1ll << 32))) { ntfs_warning(vol->sb, "Cannot allocate mft record " "because the maximum number of inodes " @@ -1739,7 +1779,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) nr > min_nr ? "default" : "minimal", (long long)nr); old_last_vcn = rl[1].vcn; do { - rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE); + rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, + true); if (likely(!IS_ERR(rl2))) break; if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { @@ -1772,7 +1813,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) return PTR_ERR(rl); } mft_ni->runlist.rl = rl; - ntfs_debug("Allocated %lli clusters.", nr); + ntfs_debug("Allocated %lli clusters.", (long long)nr); /* Find the last run in the new runlist. */ for (; rl[1].length; rl++) ; @@ -1808,7 +1849,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) BUG_ON(ll < rl2->vcn); BUG_ON(ll >= rl2->vcn + rl2->length); /* Get the size for the new mapping pairs array for this extent. */ - mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); + mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); if (unlikely(mp_size <= 0)) { ntfs_error(vol->sb, "Get size for mapping pairs failed for " "mft data attribute extent."); @@ -1832,18 +1873,22 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) // moving other attributes out of this mft record. // Note: Use the special reserved mft records and ensure that // this extent is not required to find the mft record in - // question. + // question. If no free special records left we would need to + // move an existing record away, insert ours in its place, and + // then place the moved record into the newly allocated space + // and we would then need to update all references to this mft + // record appropriately. This is rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft data attribute " "extent. Cannot handle this yet."); ret = -EOPNOTSUPP; goto undo_alloc; } - mp_rebuilt = TRUE; + mp_rebuilt = true; /* Generate the mapping pairs array directly into the attr record. */ ret = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(a->data.non_resident.mapping_pairs_offset), - mp_size, rl2, ll, NULL); + mp_size, rl2, ll, -1, NULL); if (unlikely(ret)) { ntfs_error(vol->sb, "Failed to build mapping pairs array of " "mft data attribute."); @@ -1875,9 +1920,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) } a = ctx->attr; } + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->allocated_size += nr << vol->cluster_size_bits; a->data.non_resident.allocated_size = cpu_to_sle64(mft_ni->allocated_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1892,7 +1939,9 @@ restore_undo_alloc: CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { ntfs_error(vol->sb, "Failed to find last attribute extent of " "mft data attribute.%s", es); + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->allocated_size += nr << vol->cluster_size_bits; + write_unlock_irqrestore(&mft_ni->size_lock, flags); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); up_write(&mft_ni->runlist.lock); @@ -1903,25 +1952,26 @@ restore_undo_alloc: NVolSetErrors(vol); return ret; } - a = ctx->attr; - a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); + ctx->attr->data.non_resident.highest_vcn = + cpu_to_sle64(old_last_vcn - 1); undo_alloc: - if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { + if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { ntfs_error(vol->sb, "Failed to free clusters from mft data " "attribute.%s", es); NVolSetErrors(vol); } + a = ctx->attr; if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { ntfs_error(vol->sb, "Failed to truncate mft data attribute " "runlist.%s", es); NVolSetErrors(vol); } - if (mp_rebuilt) { + if (mp_rebuilt && !IS_ERR(ctx->mrec)) { if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( a->data.non_resident.mapping_pairs_offset), old_alen - le16_to_cpu( a->data.non_resident.mapping_pairs_offset), - rl2, ll, NULL)) { + rl2, ll, -1, NULL)) { ntfs_error(vol->sb, "Failed to restore mapping pairs " "array.%s", es); NVolSetErrors(vol); @@ -1933,6 +1983,10 @@ undo_alloc: } flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); + } else if (IS_ERR(ctx->mrec)) { + ntfs_error(vol->sb, "Failed to restore attribute search " + "context.%s", es); + NVolSetErrors(vol); } if (ctx) ntfs_attr_put_search_ctx(ctx); @@ -1991,7 +2045,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, "reports this as corruption, please email " "linux-ntfs-dev@lists.sourceforge.net stating " "that you saw this message and that the " - "modified file system created was corrupt. " + "modified filesystem created was corrupt. " "Thank you."); } /* Set the update sequence number to 1. */ @@ -2036,6 +2090,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no, */ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) { + loff_t i_size; struct inode *mft_vi = vol->mft_ino; struct page *page; MFT_RECORD *m; @@ -2051,10 +2106,11 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* The maximum valid index into the page cache for $MFT's data. */ - end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + i_size = i_size_read(mft_vi); + end_index = i_size >> PAGE_CACHE_SHIFT; if (unlikely(index >= end_index)) { if (unlikely(index > end_index || ofs + vol->mft_record_size >= - (mft_vi->i_size & ~PAGE_CACHE_MASK))) { + (i_size & ~PAGE_CACHE_MASK))) { ntfs_error(vol->sb, "Tried to format non-existing mft " "record 0x%llx.", (long long)mft_no); return -ENOENT; @@ -2188,6 +2244,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, ntfs_inode *base_ni, MFT_RECORD **mrec) { s64 ll, bit, old_data_initialized, old_data_size; + unsigned long flags; struct inode *vi; struct page *page; ntfs_inode *mft_ni, *mftbmp_ni, *ni; @@ -2198,7 +2255,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, unsigned int ofs; int err; le16 seq_no, usn; - BOOL record_formatted = FALSE; + bool record_formatted = false; if (base_ni) { ntfs_debug("Entering (allocating an extent mft record for " @@ -2237,9 +2294,13 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, * the first 24 mft records as they are special and whilst they may not * be in use, we do not allocate from them. */ + read_lock_irqsave(&mft_ni->size_lock, flags); ll = mft_ni->initialized_size >> vol->mft_record_size_bits; - if (mftbmp_ni->initialized_size << 3 > ll && - mftbmp_ni->initialized_size > 3) { + read_unlock_irqrestore(&mft_ni->size_lock, flags); + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_initialized = mftbmp_ni->initialized_size; + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + if (old_data_initialized << 3 > ll && old_data_initialized > 3) { bit = ll; if (bit < 24) bit = 24; @@ -2254,15 +2315,18 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, * mft record that we can allocate. * Note: The smallest mft record we allocate is mft record 24. */ - bit = mftbmp_ni->initialized_size << 3; + bit = old_data_initialized << 3; if (unlikely(bit >= (1ll << 32))) goto max_err_out; + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_size = mftbmp_ni->allocated_size; ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " "data_size 0x%llx, initialized_size 0x%llx.", - (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, - (long long)mftbmp_ni->initialized_size); - if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) { + (long long)old_data_size, + (long long)i_size_read(vol->mftbmp_ino), + (long long)old_data_initialized); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + if (old_data_initialized + 8 > old_data_size) { /* Need to extend bitmap by one more cluster. */ ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); err = ntfs_mft_bitmap_extend_allocation_nolock(vol); @@ -2270,12 +2334,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, up_write(&vol->mftbmp_lock); goto err_out; } +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Status of mftbmp after allocation extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, + (long long)i_size_read(vol->mftbmp_ino), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ } /* * We now have sufficient allocated space, extend the initialized_size @@ -2287,12 +2355,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, up_write(&vol->mftbmp_lock); goto err_out; } +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Status of mftbmp after initialized extention: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, + (long long)i_size_read(vol->mftbmp_ino), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); found_free_rec: /* @bit is the found free mft record, allocate it in the mft bitmap. */ @@ -2314,7 +2386,10 @@ have_alloc_rec: * parallel allocation could allocate the same mft record as this one. */ ll = (bit + 1) << vol->mft_record_size_bits; - if (ll <= mft_ni->initialized_size) { + read_lock_irqsave(&mft_ni->size_lock, flags); + old_data_initialized = mft_ni->initialized_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + if (ll <= old_data_initialized) { ntfs_debug("Allocated mft record already initialized."); goto mft_rec_already_initialized; } @@ -2325,26 +2400,30 @@ have_alloc_rec: * actually traversed more than once when a freshly formatted volume is * first written to so it optimizes away nicely in the common case. */ + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data before extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); while (ll > mft_ni->allocated_size) { + read_unlock_irqrestore(&mft_ni->size_lock, flags); err = ntfs_mft_data_extend_allocation_nolock(vol); if (unlikely(err)) { ntfs_error(vol->sb, "Failed to extend mft data " "allocation."); goto undo_mftbmp_alloc_nolock; } + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data after allocation extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); } + read_unlock_irqrestore(&mft_ni->size_lock, flags); /* * Extend mft data initialized size (and data size of course) to reach * the allocated mft record, formatting the mft records allong the way. @@ -2352,6 +2431,7 @@ have_alloc_rec: * needed by ntfs_mft_record_format(). We will update the attribute * record itself in one fell swoop later on. */ + write_lock_irqsave(&mft_ni->size_lock, flags); old_data_initialized = mft_ni->initialized_size; old_data_size = vol->mft_ino->i_size; while (ll > mft_ni->initialized_size) { @@ -2360,8 +2440,9 @@ have_alloc_rec: new_initialized_size = mft_ni->initialized_size + vol->mft_record_size; mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; - if (new_initialized_size > vol->mft_ino->i_size) - vol->mft_ino->i_size = new_initialized_size; + if (new_initialized_size > i_size_read(vol->mft_ino)) + i_size_write(vol->mft_ino, new_initialized_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); ntfs_debug("Initializing mft record 0x%llx.", (long long)mft_no); err = ntfs_mft_record_format(vol, mft_no); @@ -2369,9 +2450,11 @@ have_alloc_rec: ntfs_error(vol->sb, "Failed to format mft record."); goto undo_data_init; } + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->initialized_size = new_initialized_size; } - record_formatted = TRUE; + write_unlock_irqrestore(&mft_ni->size_lock, flags); + record_formatted = true; /* Update the mft data attribute record to reflect the new sizes. */ m = map_mft_record(mft_ni); if (IS_ERR(m)) { @@ -2396,22 +2479,27 @@ have_alloc_rec: goto undo_data_init; } a = ctx->attr; + read_lock_irqsave(&mft_ni->size_lock, flags); a->data.non_resident.initialized_size = cpu_to_sle64(mft_ni->initialized_size); - a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size); + a->data.non_resident.data_size = + cpu_to_sle64(i_size_read(vol->mft_ino)); + read_unlock_irqrestore(&mft_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data after mft record initialization: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); - BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size); - BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size); + BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); + BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); + read_unlock_irqrestore(&mft_ni->size_lock, flags); mft_rec_already_initialized: /* * We can finally drop the mft bitmap lock as the mft data attribute @@ -2549,11 +2637,6 @@ mft_rec_already_initialized: goto undo_mftbmp_alloc; } vi->i_ino = bit; - /* - * This is the optimal IO size (for stat), not the fs block - * size. - */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a * file so that the file can be updated if necessary (compare @@ -2582,7 +2665,7 @@ mft_rec_already_initialized: ni->name_len = 4; ni->itype.index.block_size = 4096; - ni->itype.index.block_size_bits = generic_ffs(4096) - 1; + ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1; ni->itype.index.collation_rule = COLLATION_FILE_NAME; if (vol->cluster_size <= ni->itype.index.block_size) { ni->itype.index.vcn_size = vol->cluster_size; @@ -2621,7 +2704,7 @@ mft_rec_already_initialized: * have its page mapped and it is very easy to do. */ atomic_inc(&ni->count); - down(&ni->mrec_lock); + mutex_lock(&ni->mrec_lock); ni->page = page; ni->page_ofs = ofs; /* @@ -2652,8 +2735,10 @@ mft_rec_already_initialized: *mrec = m; return ni; undo_data_init: + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->initialized_size = old_data_initialized; - vol->mft_ino->i_size = old_data_size; + i_size_write(vol->mft_ino, old_data_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); goto undo_mftbmp_alloc_nolock; undo_mftbmp_alloc: down_write(&vol->mftbmp_lock); @@ -2706,22 +2791,22 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) BUG_ON(NInoAttr(ni)); BUG_ON(ni->nr_extents != -1); - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); base_ni = ni->ext.base_ntfs_ino; - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); BUG_ON(base_ni->nr_extents <= 0); ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n", mft_no, base_ni->mft_no); - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); /* Make sure we are holding the only reference to the extent inode. */ if (atomic_read(&ni->count) > 2) { ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, " "not freeing.", base_ni->mft_no); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); return -EBUSY; } @@ -2739,7 +2824,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) break; } - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); if (unlikely(err)) { ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to " @@ -2798,16 +2883,16 @@ rollback_error: return 0; rollback: /* Rollback what we did... */ - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); extent_nis = base_ni->ext.extent_ntfs_inos; if (!(base_ni->nr_extents & 3)) { int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); - extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS); + extent_nis = kmalloc(new_size, GFP_NOFS); if (unlikely(!extent_nis)) { ntfs_error(vol->sb, "Failed to allocate internal " "buffer during rollback.%s", es); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); NVolSetErrors(vol); goto rollback_error; } @@ -2822,7 +2907,7 @@ rollback: m->flags |= MFT_RECORD_IN_USE; m->sequence_number = old_seq_no; extent_nis[base_ni->nr_extents++] = ni; - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); mark_mft_record_dirty(ni); return err; }