X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Flinux-2.6%2Fxfs_lrw.c;h=903e60e594f1d58f48196df0b882e3737c632652;hb=ec3fc5db881d93617328ebec37f7ac5fa10377e4;hp=c45e9639ca28d592b679c281d28aa9d6818d3036;hpb=4c4c8ffd303fc76f7b67c85e0aae99bb3995d9cb;p=linux-2.6.git diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index c45e9639c..903e60e59 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -74,6 +74,7 @@ #include "xfs_iomap.h" #include +#include #if defined(XFS_RW_TRACE) @@ -81,7 +82,7 @@ void xfs_rw_enter_trace( int tag, xfs_iocore_t *io, - const struct iovec *iovp, + void *data, size_t segs, loff_t offset, int ioflags) @@ -95,7 +96,7 @@ xfs_rw_enter_trace( (void *)ip, (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), - (void *)(__psint_t)iovp, + (void *)data, (void *)((unsigned long)segs), (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), @@ -225,40 +226,11 @@ xfs_inval_cached_pages( int write, int relock) { - xfs_mount_t *mp; - - if (!VN_CACHED(vp)) { - return; - } - - mp = io->io_mount; - - /* - * We need to get the I/O lock exclusively in order - * to safely invalidate pages and mappings. - */ - if (relock) { - XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED); - XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL); - } - - /* Writing beyond EOF creates a hole that must be zeroed */ - if (write && (offset > XFS_SIZE(mp, io))) { - xfs_fsize_t isize; - - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); - isize = XFS_SIZE(mp, io); - if (offset > isize) { - xfs_zero_eof(vp, io, offset, isize, offset); - } - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + if (VN_CACHED(vp)) { + xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); + VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); } - xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); - if (relock) { - XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); - } } ssize_t /* bytes read, or (-) error */ @@ -324,11 +296,6 @@ xfs_read( return -EIO; } - /* OK so we are holding the I/O lock for the duration - * of the submission, then what happens if the I/O - * does not really happen here, but is scheduled - * later? - */ xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && @@ -345,8 +312,11 @@ xfs_read( } xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, - iovp, segs, *offset, ioflags); + (void *)iovp, segs, *offset, ioflags); ret = __generic_file_aio_read(iocb, iovp, segs, offset); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(iocb); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (ret > 0) @@ -406,12 +376,17 @@ xfs_sendfile( } } xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, - target, count, *offset, ioflags); + (void *)(unsigned long)target, count, *offset, ioflags); ret = generic_file_sendfile(filp, offset, count, actor, target); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); - XFS_STATS_ADD(xs_read_bytes, ret); - xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + if (likely(!(ioflags & IO_INVIS))) + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + return ret; } @@ -637,32 +612,34 @@ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, - unsigned int segs, + unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; - size_t size = 0; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; - ssize_t ret; - int error = 0; + ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; - xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; + size_t ocount = 0, count; + loff_t pos; + int need_isem = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); - /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; @@ -670,73 +647,90 @@ xfs_write( * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ - size += iv->iov_len; - if (unlikely((ssize_t)(size|iv->iov_len) < 0)) - return XFS_ERROR(-EINVAL); + ocount += iv->iov_len; + if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + segs = seg; + ocount -= iv->iov_len; /* This segment is no good */ + break; } - /* END copy & waste from filemap.c */ - if (size == 0) + count = ocount; + pos = *offset; + + if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; - if (XFS_FORCED_SHUTDOWN(mp)) { + if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((*offset & target->pbr_smask) || - (size & target->pbr_smask)) { + if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); - } - iolock = XFS_IOLOCK_SHARED; - locktype = VRWLOCK_WRITE_DIRECT; - } else { + + if (!VN_CACHED(vp) && pos < i_size_read(inode)) + need_isem = 0; + + if (VN_CACHED(vp)) + need_flush = 1; + } + +relock: + if (need_isem) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; + + down(&inode->i_sem); + } else { + iolock = XFS_IOLOCK_SHARED; + locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); - isize = xip->i_d.di_size; - limit = XFS_MAXIOFFSET(mp); + isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: - n = limit - *offset; - if (n <= 0) { + error = -generic_write_checks(file, &pos, &count, + S_ISBLK(inode->i_mode)); + if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); - return -EFBIG; + goto out_unlock_isem; } - if (n < size) - size = n; - - new_size = *offset + size; - if (new_size > isize) { + new_size = pos + count; + if (new_size > isize) io->io_new_size = new_size; - } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { - loff_t savedsize = *offset; - int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); + loff_t savedsize = pos; + int dmflags = FILP_DELAY_FLAG(file); + + if (need_isem) + dmflags |= DM_FLAGS_ISEM; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, - *offset, size, + pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); - return -error; + goto out_unlock_isem; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; @@ -748,9 +742,8 @@ start: * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ - if ((file->f_flags & O_APPEND) && - savedsize != xip->i_d.di_size) { - *offset = isize = xip->i_d.di_size; + if ((file->f_flags & O_APPEND) && savedsize != isize) { + pos = isize = xip->i_d.di_size; goto start; } } @@ -761,8 +754,10 @@ start: * * We must update xfs' times since revalidate will overcopy xfs. */ - if (size && !(ioflags & IO_INVIS)) + if (!(ioflags & IO_INVIS)) { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + inode_update_time(inode, 1); + } /* * If the offset is beyond the size of the file, we have a couple @@ -773,12 +768,12 @@ start: * to zero it out up to the new size. */ - if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { - error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, - isize, *offset + size); + if (pos > isize) { + error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, + isize, pos + count); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); - return(-error); + goto out_unlock_isem; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); @@ -795,22 +790,67 @@ start: (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); - if (error) { + if (likely(!error)) + error = -remove_suid(file->f_dentry); + if (unlikely(error)) { xfs_iunlock(xip, iolock); - return -error; + goto out_unlock_isem; } } retry: - if (ioflags & IO_ISDIRECT) { - xfs_inval_cached_pages(vp, io, *offset, 1, 1); - xfs_rw_enter_trace(XFS_DIOWR_ENTER, - io, iovp, segs, *offset, ioflags); + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + + if ((ioflags & IO_ISDIRECT)) { + if (need_flush) { + xfs_inval_cached_trace(io, pos, -1, + ctooff(offtoct(pos)), -1); + VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), + -1, FI_REMAPF_LOCKED); + } + + if (need_isem) { + /* demote the lock now the cached pages are gone */ + XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + up(&inode->i_sem); + + iolock = XFS_IOLOCK_SHARED; + locktype = VRWLOCK_WRITE_DIRECT; + need_isem = 0; + } + + xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, + *offset, ioflags); + ret = generic_file_direct_write(iocb, iovp, + &segs, pos, offset, count, ocount); + + /* + * direct-io write to a hole: fall through to buffered I/O + * for completing the rest of the request. + */ + if (ret >= 0 && ret != count) { + XFS_STATS_ADD(xs_write_bytes, ret); + + pos += ret; + count -= ret; + + need_isem = 1; + ioflags &= ~IO_ISDIRECT; + xfs_iunlock(xip, iolock); + goto relock; + } } else { - xfs_rw_enter_trace(XFS_WRITE_ENTER, - io, iovp, segs, *offset, ioflags); + xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, + *offset, ioflags); + ret = generic_file_buffered_write(iocb, iovp, segs, + pos, offset, count, ret); } - ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); + + current->backing_dev_info = NULL; + + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && @@ -821,17 +861,15 @@ retry: DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) - return -error; + goto out_unlock_isem; xfs_rwlock(bdp, locktype); - *offset = xip->i_d.di_size; + pos = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { - struct inode *inode = LINVFS_GET_IP(vp); - xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; @@ -840,23 +878,22 @@ retry: xfs_iunlock(xip, XFS_ILOCK_EXCL); } - if (ret <= 0) { - xfs_rwunlock(bdp, locktype); - return ret; - } + error = -ret; + if (ret <= 0) + goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { - + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ + if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && + !(xip->i_update_size)) { + xfs_inode_log_item_t *iip = xip->i_itemp; - if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) - && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force @@ -876,14 +913,8 @@ retry: * all changes affecting the inode are permanent * when we return. */ - - xfs_inode_log_item_t *iip; - xfs_lsn_t lsn; - - iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { - lsn = iip->ili_last_lsn; - xfs_log_force(mp, lsn, + xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, @@ -924,12 +955,27 @@ retry: xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); + if (error) + goto out_unlock_internal; } } - } /* (ioflags & O_SYNC) */ + + xfs_rwunlock(bdp, locktype); + if (need_isem) + up(&inode->i_sem); + + error = sync_page_range(inode, mapping, pos, ret); + if (!error) + error = ret; + return error; + } + out_unlock_internal: xfs_rwunlock(bdp, locktype); - return(ret); + out_unlock_isem: + if (need_isem) + up(&inode->i_sem); + return -error; } /*