X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fsplice.c;h=0b8316692f55067c67f5c108cba152082db0d357;hb=bef9ea3086d47cf98cfd0ea389953a0af0b60114;hp=a285fd746dc0d34aa97762a6e138416acae292e7;hpb=16cf0ec7408f389279d413869e94c1a351392f97;p=linux-2.6.git diff --git a/fs/splice.c b/fs/splice.c index a285fd746..0b8316692 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -55,31 +55,43 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; lock_page(page); - WARN_ON(!PageUptodate(page)); + mapping = page_mapping(page); + if (mapping) { + WARN_ON(!PageUptodate(page)); - /* - * At least for ext2 with nobh option, we need to wait on writeback - * completing on this page, since we'll remove it from the pagecache. - * Otherwise truncate wont wait on the page, allowing the disk - * blocks to be reused by someone else before we actually wrote our - * data to them. fs corruption ensues. - */ - wait_on_page_writeback(page); + /* + * At least for ext2 with nobh option, we need to wait on + * writeback completing on this page, since we'll remove it + * from the pagecache. Otherwise truncate wont wait on the + * page, allowing the disk blocks to be reused by someone else + * before we actually wrote our data to them. fs corruption + * ensues. + */ + wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, mapping_gfp_mask(mapping)); + if (PagePrivate(page)) + try_to_release_page(page, mapping_gfp_mask(mapping)); - if (!remove_mapping(mapping, page)) { - unlock_page(page); - return 1; + /* + * If we succeeded in removing the mapping, set LRU flag + * and return good. + */ + if (remove_mapping(mapping, page)) { + buf->flags |= PIPE_BUF_FLAG_LRU; + return 0; + } } - buf->flags |= PIPE_BUF_FLAG_LRU; - return 0; + /* + * Raced with truncate or failed to remove page from current + * address space, unlock and return failure. + */ + unlock_page(page); + return 1; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, @@ -595,7 +607,7 @@ find_page: ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) - goto out_nomem; + goto out_ret; /* * This will also lock the page @@ -654,7 +666,7 @@ find_page: if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); - goto out; + goto out_ret; } if (buf->page != page) { @@ -686,7 +698,7 @@ find_page: out: page_cache_release(page); unlock_page(page); -out_nomem: +out_ret: return ret; } @@ -1029,6 +1041,19 @@ out_release: EXPORT_SYMBOL(do_splice_direct); +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +static inline int is_pipe(struct inode *inode) +{ + if (inode->i_pipe && S_ISFIFO(inode->i_mode)) + return 1; + + return 0; +} + /* * Determine where to splice to/from. */ @@ -1040,8 +1065,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, loff_t offset, *off; long ret; - pipe = in->f_dentry->d_inode->i_pipe; - if (pipe) { + if (is_pipe(in->f_dentry->d_inode)) { + pipe = in->f_dentry->d_inode->i_pipe; if (off_in) return -ESPIPE; if (off_out) { @@ -1061,8 +1086,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; } - pipe = out->f_dentry->d_inode->i_pipe; - if (pipe) { + if (is_pipe(out->f_dentry->d_inode)) { + pipe = out->f_dentry->d_inode->i_pipe; if (off_out) return -ESPIPE; if (off_in) { @@ -1219,7 +1244,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, static long do_vmsplice(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { - struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe; + struct pipe_inode_info *pipe; struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; struct splice_pipe_desc spd = { @@ -1229,7 +1254,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, .ops = &user_page_pipe_buf_ops, }; - if (unlikely(!pipe)) + if (!is_pipe(file->f_dentry->d_inode)) return -EBADF; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; @@ -1241,6 +1266,7 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, if (spd.nr_pages <= 0) return spd.nr_pages; + pipe = file->f_dentry->d_inode->i_pipe; return splice_to_pipe(pipe, &spd); } @@ -1294,6 +1320,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, return error; } +/* + * Make sure there's data to read. Wait for input if we can, otherwise + * return an appropriate error. + */ +static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +{ + int ret; + + /* + * Check ->nrbufs without the inode lock first. This function + * is speculative anyways, so missing one is ok. + */ + if (pipe->nrbufs) + return 0; + + ret = 0; + mutex_lock(&pipe->inode->i_mutex); + + while (!pipe->nrbufs) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!pipe->writers) + break; + if (!pipe->waiting_writers) { + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + } + pipe_wait(pipe); + } + + mutex_unlock(&pipe->inode->i_mutex); + return ret; +} + +/* + * Make sure there's writeable room. Wait for room if we can, otherwise + * return an appropriate error. + */ +static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +{ + int ret; + + /* + * Check ->nrbufs without the inode lock first. This function + * is speculative anyways, so missing one is ok. + */ + if (pipe->nrbufs < PIPE_BUFFERS) + return 0; + + ret = 0; + mutex_lock(&pipe->inode->i_mutex); + + while (pipe->nrbufs >= PIPE_BUFFERS) { + if (!pipe->readers) { + send_sig(SIGPIPE, current, 0); + ret = -EPIPE; + break; + } + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + pipe->waiting_writers++; + pipe_wait(pipe); + pipe->waiting_writers--; + } + + mutex_unlock(&pipe->inode->i_mutex); + return ret; +} + /* * Link contents of ipipe to opipe. */ @@ -1302,9 +1407,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; - int ret, do_wakeup, i, ipipe_first; - - ret = do_wakeup = ipipe_first = 0; + int ret = 0, i = 0, nbuf; /* * Potential ABBA deadlock, work around it by ordering lock @@ -1312,126 +1415,62 @@ static int link_pipe(struct pipe_inode_info *ipipe, * could deadlock (one doing tee from A -> B, the other from B -> A). */ if (ipipe->inode < opipe->inode) { - ipipe_first = 1; - mutex_lock(&ipipe->inode->i_mutex); - mutex_lock(&opipe->inode->i_mutex); + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); } else { - mutex_lock(&opipe->inode->i_mutex); - mutex_lock(&ipipe->inode->i_mutex); + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); } - for (i = 0;; i++) { + do { if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } - if (ipipe->nrbufs - i) { - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); - /* - * If we have room, fill this buffer - */ - if (opipe->nrbufs < PIPE_BUFFERS) { - int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); - - /* - * Get a reference to this pipe buffer, - * so we can copy the contents over. - */ - ibuf->ops->get(ipipe, ibuf); - - obuf = opipe->bufs + nbuf; - *obuf = *ibuf; - - /* - * Don't inherit the gift flag, we need to - * prevent multiple steals of this page. - */ - obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - - if (obuf->len > len) - obuf->len = len; - - opipe->nrbufs++; - do_wakeup = 1; - ret += obuf->len; - len -= obuf->len; - - if (!len) - break; - if (opipe->nrbufs < PIPE_BUFFERS) - continue; - } - - /* - * We have input available, but no output room. - * If we already copied data, return that. If we - * need to drop the opipe lock, it must be ordered - * last to avoid deadlocks. - */ - if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { - if (!ret) - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&opipe->wait)) - wake_up_interruptible(&opipe->wait); - kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); - do_wakeup = 0; - } + /* + * If we have iterated all input buffers or ran out of + * output room, break. + */ + if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) + break; - opipe->waiting_writers++; - pipe_wait(opipe); - opipe->waiting_writers--; - continue; - } + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); + nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); /* - * No input buffers, do the usual checks for available - * writers and blocking and wait if necessary + * Get a reference to this pipe buffer, + * so we can copy the contents over. */ - if (!ipipe->writers) - break; - if (!ipipe->waiting_writers) { - if (ret) - break; - } + ibuf->ops->get(ipipe, ibuf); + + obuf = opipe->bufs + nbuf; + *obuf = *ibuf; + /* - * pipe_wait() drops the ipipe mutex. To avoid deadlocks - * with another process, we can only safely do that if - * the ipipe lock is ordered last. + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. */ - if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { - if (!ret) - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - if (waitqueue_active(&ipipe->wait)) - wake_up_interruptible_sync(&ipipe->wait); - kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); + if (obuf->len > len) + obuf->len = len; - pipe_wait(ipipe); - } + opipe->nrbufs++; + ret += obuf->len; + len -= obuf->len; + i++; + } while (len); mutex_unlock(&ipipe->inode->i_mutex); mutex_unlock(&opipe->inode->i_mutex); - if (do_wakeup) { + /* + * If we put data in the output pipe, wakeup any potential readers. + */ + if (ret > 0) { smp_mb(); if (waitqueue_active(&opipe->wait)) wake_up_interruptible(&opipe->wait); @@ -1450,16 +1489,36 @@ static int link_pipe(struct pipe_inode_info *ipipe, static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; - struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; + struct pipe_inode_info *ipipe; + struct pipe_inode_info *opipe; + int ret = -EINVAL; + + if (!is_pipe(in->f_dentry->d_inode) || !is_pipe(out->f_dentry->d_inode)) + return ret; /* - * Link ipipe to the two output pipes, consuming as we go along. + * Duplicate the contents of ipipe to opipe without actually + * copying the data. */ - if (ipipe && opipe) - return link_pipe(ipipe, opipe, len, flags); + ipipe = in->f_dentry->d_inode->i_pipe; + opipe = out->f_dentry->d_inode->i_pipe; + if (ipipe != opipe) { + /* + * Keep going, unless we encounter an error. The ipipe/opipe + * ordering doesn't really matter. + */ + ret = link_ipipe_prep(ipipe, flags); + if (!ret) { + ret = link_opipe_prep(opipe, flags); + if (!ret) { + ret = link_pipe(ipipe, opipe, len, flags); + if (!ret && (flags & SPLICE_F_NONBLOCK)) + ret = -EAGAIN; + } + } + } - return -EINVAL; + return ret; } asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)