X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fpipe.c;h=8aada8e426f44f12095b59810c5f270bc128f014;hb=7dc8dcbeec756d7489be41a49cc92c27333e135a;hp=9ae8d8322dd7a17b2afd1ead51bfb94b9e6fc4b4;hpb=bc77d24c47b89f1e0efed0b8e4be5f8aad102883;p=linux-2.6.git diff --git a/fs/pipe.c b/fs/pipe.c index 9ae8d8322..8aada8e42 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include @@ -37,14 +39,18 @@ void pipe_wait(struct inode * inode) { DEFINE_WAIT(wait); - prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE); - up(PIPE_SEM(*inode)); + /* + * Pipes are system-local resources, so sleeping on them + * is considered a noninteractive wait: + */ + prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); + mutex_unlock(PIPE_MUTEX(*inode)); schedule(); finish_wait(PIPE_WAIT(*inode), &wait); - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); } -static inline int +static int pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) { unsigned long copy; @@ -64,7 +70,7 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) return 0; } -static inline int +static int pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) { unsigned long copy; @@ -84,20 +90,45 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) return 0; } +static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + struct page *page = buf->page; + + if (info->tmp_page) { + __free_page(page); + return; + } + info->tmp_page = page; +} + +static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + return kmap(buf->page); +} + +static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + kunmap(buf->page); +} + +static struct pipe_buf_operations anon_pipe_buf_ops = { + .can_merge = 1, + .map = anon_pipe_buf_map, + .unmap = anon_pipe_buf_unmap, + .release = anon_pipe_buf_release, +}; + static ssize_t pipe_readv(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; int do_wakeup; ssize_t ret; struct iovec *iov = (struct iovec *)_iov; size_t total_len; - /* pread is not allowed on pipes. */ - if (unlikely(ppos != &filp->f_pos)) - return -ESPIPE; - total_len = iov_length(iov, nr_segs); /* Null read succeeds. */ if (unlikely(total_len == 0)) @@ -105,33 +136,44 @@ pipe_readv(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); + info = inode->i_pipe; for (;;) { - int size = PIPE_LEN(*inode); - if (size) { - char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode); - ssize_t chars = PIPE_MAX_RCHUNK(*inode); + int bufs = info->nrbufs; + if (bufs) { + int curbuf = info->curbuf; + struct pipe_buffer *buf = info->bufs + curbuf; + struct pipe_buf_operations *ops = buf->ops; + void *addr; + size_t chars = buf->len; + int error; if (chars > total_len) chars = total_len; - if (chars > size) - chars = size; - if (pipe_iov_copy_to_user(iov, pipebuf, chars)) { + addr = ops->map(filp, info, buf); + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); + ops->unmap(info, buf); + if (unlikely(error)) { if (!ret) ret = -EFAULT; break; } ret += chars; - - PIPE_START(*inode) += chars; - PIPE_START(*inode) &= (PIPE_SIZE - 1); - PIPE_LEN(*inode) -= chars; + buf->offset += chars; + buf->len -= chars; + if (!buf->len) { + buf->ops = NULL; + ops->release(info, buf); + curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); + info->curbuf = curbuf; + info->nrbufs = --bufs; + do_wakeup = 1; + } total_len -= chars; - do_wakeup = 1; if (!total_len) break; /* common path: read succeeded */ } - if (PIPE_LEN(*inode)) /* test for cyclic buffers */ + if (bufs) /* More to do? */ continue; if (!PIPE_WRITERS(*inode)) break; @@ -158,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, } pipe_wait(inode); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); @@ -181,15 +223,12 @@ pipe_writev(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; ssize_t ret; - size_t min; int do_wakeup; struct iovec *iov = (struct iovec *)_iov; size_t total_len; - - /* pwrite is not allowed on pipes. */ - if (unlikely(ppos != &filp->f_pos)) - return -ESPIPE; + ssize_t chars; total_len = iov_length(iov, nr_segs); /* Null write succeeds. */ @@ -198,48 +237,92 @@ pipe_writev(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - min = total_len; - if (min > PIPE_BUF) - min = 1; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); + info = inode->i_pipe; + + if (!PIPE_READERS(*inode)) { + send_sig(SIGPIPE, current, 0); + ret = -EPIPE; + goto out; + } + + /* We try to merge small writes */ + chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ + if (info->nrbufs && chars != 0) { + int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); + struct pipe_buffer *buf = info->bufs + lastbuf; + struct pipe_buf_operations *ops = buf->ops; + int offset = buf->offset + buf->len; + if (ops->can_merge && offset + chars <= PAGE_SIZE) { + void *addr = ops->map(filp, info, buf); + int error = pipe_iov_copy_from_user(offset + addr, iov, chars); + ops->unmap(info, buf); + ret = error; + do_wakeup = 1; + if (error) + goto out; + buf->len += chars; + total_len -= chars; + ret = chars; + if (!total_len) + goto out; + } + } + for (;;) { - int free; + int bufs; if (!PIPE_READERS(*inode)) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } - free = PIPE_FREE(*inode); - if (free >= min) { - /* transfer data */ - ssize_t chars = PIPE_MAX_WCHUNK(*inode); - char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode); + bufs = info->nrbufs; + if (bufs < PIPE_BUFFERS) { + int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); + struct pipe_buffer *buf = info->bufs + newbuf; + struct page *page = info->tmp_page; + int error; + + if (!page) { + page = alloc_page(GFP_HIGHUSER); + if (unlikely(!page)) { + ret = ret ? : -ENOMEM; + break; + } + info->tmp_page = page; + } /* Always wakeup, even if the copy fails. Otherwise * we lock up (O_NONBLOCK-)readers that sleep due to * syscall merging. + * FIXME! Is this really true? */ do_wakeup = 1; + chars = PAGE_SIZE; if (chars > total_len) chars = total_len; - if (chars > free) - chars = free; - if (pipe_iov_copy_from_user(pipebuf, iov, chars)) { + error = pipe_iov_copy_from_user(kmap(page), iov, chars); + kunmap(page); + if (unlikely(error)) { if (!ret) ret = -EFAULT; break; } ret += chars; - PIPE_LEN(*inode) += chars; + /* Insert it into the buffer array */ + buf->page = page; + buf->ops = &anon_pipe_buf_ops; + buf->offset = 0; + buf->len = chars; + info->nrbufs = ++bufs; + info->tmp_page = NULL; + total_len -= chars; if (!total_len) break; } - if (PIPE_FREE(*inode) && ret) { - /* handle cyclic data buffers */ - min = 1; + if (bufs < PIPE_BUFFERS) continue; - } if (filp->f_flags & O_NONBLOCK) { if (!ret) ret = -EAGAIN; break; @@ -257,13 +340,14 @@ pipe_writev(struct file *filp, const struct iovec *_iov, pipe_wait(inode); PIPE_WAITING_WRITERS(*inode)--; } - up(PIPE_SEM(*inode)); +out: + mutex_unlock(PIPE_MUTEX(*inode)); if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); } if (ret > 0) - inode_update_time(inode, 1); /* mtime and ctime */ + file_update_time(filp); return ret; } @@ -291,9 +375,23 @@ static int pipe_ioctl(struct inode *pino, struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; + int count, buf, nrbufs; + switch (cmd) { case FIONREAD: - return put_user(PIPE_LEN(*pino), (int __user *)arg); + mutex_lock(PIPE_MUTEX(*inode)); + info = inode->i_pipe; + count = 0; + buf = info->curbuf; + nrbufs = info->nrbufs; + while (--nrbufs >= 0) { + count += info->bufs[buf].len; + buf = (buf+1) & (PIPE_BUFFERS-1); + } + mutex_unlock(PIPE_MUTEX(*inode)); + return put_user(count, (int __user *)arg); default: return -EINVAL; } @@ -305,41 +403,47 @@ pipe_poll(struct file *filp, poll_table *wait) { unsigned int mask; struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info = inode->i_pipe; + int nrbufs; poll_wait(filp, PIPE_WAIT(*inode), wait); /* Reading only -- no need for acquiring the semaphore. */ - mask = POLLIN | POLLRDNORM; - if (PIPE_EMPTY(*inode)) - mask = POLLOUT | POLLWRNORM; - if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) - mask |= POLLHUP; - if (!PIPE_READERS(*inode)) - mask |= POLLERR; + nrbufs = info->nrbufs; + mask = 0; + if (filp->f_mode & FMODE_READ) { + mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; + if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) + mask |= POLLHUP; + } + + if (filp->f_mode & FMODE_WRITE) { + mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; + /* + * Most Unices do not set POLLERR for FIFOs but on Linux they + * behave exactly like pipes for poll(). + */ + if (!PIPE_READERS(*inode)) + mask |= POLLERR; + } return mask; } -/* FIXME: most Unices do not set POLLERR for fifos */ -#define fifo_poll pipe_poll - static int pipe_release(struct inode *inode, int decr, int decw) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode) -= decr; PIPE_WRITERS(*inode) -= decw; if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { - struct pipe_inode_info *info = inode->i_pipe; - inode->i_pipe = NULL; - free_page((unsigned long) info->base); - kfree(info); + free_pipe_info(inode); } else { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -350,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -367,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -384,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); if (retval >= 0) retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -430,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp) { /* We could have perhaps used atomic_t, but this and friends below are the only places. So it doesn't seem worthwhile. */ - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -440,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp) static int pipe_write_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -450,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp) static int pipe_rdwr_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); if (filp->f_mode & FMODE_READ) PIPE_READERS(*inode)++; if (filp->f_mode & FMODE_WRITE) PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -469,7 +573,7 @@ struct file_operations read_fifo_fops = { .read = pipe_read, .readv = pipe_readv, .write = bad_pipe_w, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_read_open, .release = pipe_read_release, @@ -481,7 +585,7 @@ struct file_operations write_fifo_fops = { .read = bad_pipe_r, .write = pipe_write, .writev = pipe_writev, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, .release = pipe_write_release, @@ -494,14 +598,14 @@ struct file_operations rdwr_fifo_fops = { .readv = pipe_readv, .write = pipe_write, .writev = pipe_writev, - .poll = fifo_poll, + .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, .release = pipe_rdwr_release, .fasync = pipe_rdwr_fasync, }; -struct file_operations read_pipe_fops = { +static struct file_operations read_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, @@ -513,7 +617,7 @@ struct file_operations read_pipe_fops = { .fasync = pipe_read_fasync, }; -struct file_operations write_pipe_fops = { +static struct file_operations write_pipe_fops = { .llseek = no_llseek, .read = bad_pipe_r, .write = pipe_write, @@ -525,7 +629,7 @@ struct file_operations write_pipe_fops = { .fasync = pipe_write_fasync, }; -struct file_operations rdwr_pipe_fops = { +static struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, @@ -538,29 +642,37 @@ struct file_operations rdwr_pipe_fops = { .fasync = pipe_rdwr_fasync, }; -struct inode* pipe_new(struct inode* inode) +void free_pipe_info(struct inode *inode) { - unsigned long page; + int i; + struct pipe_inode_info *info = inode->i_pipe; - page = __get_free_page(GFP_USER); - if (!page) - return NULL; + inode->i_pipe = NULL; + for (i = 0; i < PIPE_BUFFERS; i++) { + struct pipe_buffer *buf = info->bufs + i; + if (buf->ops) + buf->ops->release(info, buf); + } + if (info->tmp_page) + __free_page(info->tmp_page); + kfree(info); +} + +struct inode* pipe_new(struct inode* inode) +{ + struct pipe_inode_info *info; - inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); - if (!inode->i_pipe) + info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); + if (!info) goto fail_page; + memset(info, 0, sizeof(*info)); + inode->i_pipe = info; init_waitqueue_head(PIPE_WAIT(*inode)); - PIPE_BASE(*inode) = (char*) page; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; - PIPE_WAITING_WRITERS(*inode) = 0; PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1; - *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL; return inode; fail_page: - free_page(page); return NULL; } @@ -656,13 +768,13 @@ int do_pipe(int *fd) f1->f_pos = f2->f_pos = 0; f1->f_flags = O_RDONLY; f1->f_op = &read_pipe_fops; - f1->f_mode = 1; + f1->f_mode = FMODE_READ; f1->f_version = 0; /* write file */ f2->f_flags = O_WRONLY; f2->f_op = &write_pipe_fops; - f2->f_mode = 2; + f2->f_mode = FMODE_WRITE; f2->f_version = 0; fd_install(i, f1); @@ -676,9 +788,7 @@ close_f12_inode_i_j: close_f12_inode_i: put_unused_fd(i); close_f12_inode: - free_page((unsigned long) PIPE_BASE(*inode)); - kfree(inode->i_pipe); - inode->i_pipe = NULL; + free_pipe_info(inode); iput(inode); close_f12: put_filp(f2); @@ -688,8 +798,6 @@ no_files: return error; } -EXPORT_SYMBOL_GPL(do_pipe); - /* * pipefs should _never_ be mounted by userland - too much of security hassle, * no real gain from having the whole whorehouse mounted. So we don't need @@ -728,5 +836,5 @@ static void __exit exit_pipe_fs(void) mntput(pipe_mnt); } -module_init(init_pipe_fs) -module_exit(exit_pipe_fs) +fs_initcall(init_pipe_fs); +module_exit(exit_pipe_fs);