X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fsplice.c;h=2fca6ebf4cc2d17ceb05ccb6cbe7f93ab3571a87;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=0b8316692f55067c67f5c108cba152082db0d357;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/splice.c b/fs/splice.c index 0b8316692..2fca6ebf4 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -12,7 +12,7 @@ * Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. * - * Copyright (C) 2005-2006 Jens Axboe + * Copyright (C) 2005-2006 Jens Axboe * Copyright (C) 2005-2006 Linus Torvalds * Copyright (C) 2006 Ingo Molnar * @@ -42,7 +42,7 @@ struct splice_pipe_desc { struct partial_page *partial; /* pages[] may not be contig */ int nr_pages; /* number of pages in map */ unsigned int flags; /* splice flags */ - struct pipe_buf_operations *ops;/* ops associated with output pipe */ + const struct pipe_buf_operations *ops;/* ops associated with output pipe */ }; /* @@ -74,7 +74,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, wait_on_page_writeback(page); if (PagePrivate(page)) - try_to_release_page(page, mapping_gfp_mask(mapping)); + try_to_release_page(page, GFP_KERNEL); /* * If we succeeded in removing the mapping, set LRU flag @@ -139,7 +139,7 @@ error: return err; } -static struct pipe_buf_operations page_cache_pipe_buf_ops = { +static const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -159,7 +159,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, return generic_pipe_buf_steal(pipe, buf); } -static struct pipe_buf_operations user_page_pipe_buf_ops = { +static const struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -333,7 +333,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_mask(mapping)); + GFP_KERNEL); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) @@ -557,7 +557,6 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; - gfp_t gfp_mask = mapping_gfp_mask(mapping); unsigned int offset, this_len; struct page *page; pgoff_t index; @@ -591,7 +590,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto find_page; page = buf->page; - if (add_to_page_cache(page, mapping, index, gfp_mask)) { + if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { unlock_page(page); goto find_page; } @@ -613,7 +612,7 @@ find_page: * This will also lock the page */ ret = add_to_page_cache_lru(page, mapping, index, - gfp_mask); + GFP_KERNEL); if (unlikely(ret)) goto out; } @@ -707,9 +706,9 @@ out_ret: * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags, - splice_actor *actor) +static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -722,13 +721,10 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.file = out; sd.pos = *ppos; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); - for (;;) { if (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - struct pipe_buf_operations *ops = buf->ops; + const struct pipe_buf_operations *ops = buf->ops; sd.len = buf->len; if (sd.len > sd.total_len) @@ -797,9 +793,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, pipe_wait(pipe); } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { smp_mb(); if (waitqueue_active(&pipe->wait)) @@ -810,6 +803,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor) +{ + ssize_t ret; + struct inode *inode = out->f_mapping->host; + + /* + * The actor worker might be calling ->prepare_write and + * ->commit_write. Most of the time, these expect i_mutex to + * be held. Since this may result in an ABBA deadlock with + * pipe->inode, we have to order lock acquiry here. + */ + inode_double_lock(inode, pipe->inode); + ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); + inode_double_unlock(inode, pipe->inode); + + return ret; +} + +/** + * generic_file_splice_write_nolock - generic_file_splice_write without mutexes + * @pipe: pipe info + * @out: file to write to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. The caller is responsible + * for acquiring i_mutex on both inodes. + * + */ +ssize_t +generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + int err; + + err = remove_suid(out->f_path.dentry); + if (unlikely(err)) + return err; + + ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + if (ret > 0) { + *ppos += ret; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + + if (err) + ret = err; + } + } + + return ret; +} + +EXPORT_SYMBOL(generic_file_splice_write_nolock); + /** * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info @@ -826,12 +886,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; ssize_t ret; + int err; + + err = should_remove_suid(out->f_path.dentry); + if (unlikely(err)) { + mutex_lock(&inode->i_mutex); + err = __remove_suid(out->f_path.dentry, err); + mutex_unlock(&inode->i_mutex); + if (err) + return err; + } ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { - struct inode *inode = mapping->host; - *ppos += ret; /* @@ -839,8 +908,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); @@ -941,7 +1008,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, * randomly drop data for eg socket -> socket splicing. Use the * piped splicing for that! */ - i_mode = in->f_dentry->d_inode->i_mode; + i_mode = in->f_path.dentry->d_inode->i_mode; if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) return -EINVAL; @@ -1046,12 +1113,12 @@ EXPORT_SYMBOL(do_splice_direct); * location, so checking ->i_pipe is not enough to verify that this is a * pipe. */ -static inline int is_pipe(struct inode *inode) +static inline struct pipe_inode_info *pipe_info(struct inode *inode) { - if (inode->i_pipe && S_ISFIFO(inode->i_mode)) - return 1; + if (S_ISFIFO(inode->i_mode)) + return inode->i_pipe; - return 0; + return NULL; } /* @@ -1065,8 +1132,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, loff_t offset, *off; long ret; - if (is_pipe(in->f_dentry->d_inode)) { - pipe = in->f_dentry->d_inode->i_pipe; + pipe = pipe_info(in->f_path.dentry->d_inode); + if (pipe) { if (off_in) return -ESPIPE; if (off_out) { @@ -1086,8 +1153,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; } - if (is_pipe(out->f_dentry->d_inode)) { - pipe = out->f_dentry->d_inode->i_pipe; + pipe = pipe_info(out->f_path.dentry->d_inode); + if (pipe) { if (off_out) return -ESPIPE; if (off_in) { @@ -1254,7 +1321,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, .ops = &user_page_pipe_buf_ops, }; - if (!is_pipe(file->f_dentry->d_inode)) + pipe = pipe_info(file->f_path.dentry->d_inode); + if (!pipe) return -EBADF; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; @@ -1266,7 +1334,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, if (spd.nr_pages <= 0) return spd.nr_pages; - pipe = file->f_dentry->d_inode->i_pipe; return splice_to_pipe(pipe, &spd); } @@ -1414,13 +1481,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * grabbing by inode address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - if (ipipe->inode < opipe->inode) { - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); - } + inode_double_lock(ipipe->inode, opipe->inode); do { if (!opipe->readers) { @@ -1464,8 +1525,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); - mutex_unlock(&ipipe->inode->i_mutex); - mutex_unlock(&opipe->inode->i_mutex); + inode_double_unlock(ipipe->inode, opipe->inode); /* * If we put data in the output pipe, wakeup any potential readers. @@ -1489,20 +1549,15 @@ static int link_pipe(struct pipe_inode_info *ipipe, static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe; - struct pipe_inode_info *opipe; + struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); + struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); int ret = -EINVAL; - if (!is_pipe(in->f_dentry->d_inode) || !is_pipe(out->f_dentry->d_inode)) - return ret; - /* * Duplicate the contents of ipipe to opipe without actually * copying the data. */ - ipipe = in->f_dentry->d_inode->i_pipe; - opipe = out->f_dentry->d_inode->i_pipe; - if (ipipe != opipe) { + if (ipipe && opipe && ipipe != opipe) { /* * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter.