#include <linux/mount.h>
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
+#include <linux/highmem.h>
+
#include <asm/uaccess.h>
#include <asm/ioctls.h>
{
DEFINE_WAIT(wait);
- prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
- up(PIPE_SEM(*inode));
+ /*
+ * Pipes are system-local resources, so sleeping on them
+ * is considered a noninteractive wait:
+ */
+ prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
+ mutex_unlock(PIPE_MUTEX(*inode));
schedule();
finish_wait(PIPE_WAIT(*inode), &wait);
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
}
-static inline int
+static int
pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
{
unsigned long copy;
return 0;
}
-static inline int
+static int
pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
{
unsigned long copy;
return 0;
}
+static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+ struct page *page = buf->page;
+
+ if (info->tmp_page) {
+ __free_page(page);
+ return;
+ }
+ info->tmp_page = page;
+}
+
+static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+ return kmap(buf->page);
+}
+
+static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+ kunmap(buf->page);
+}
+
+static struct pipe_buf_operations anon_pipe_buf_ops = {
+ .can_merge = 1,
+ .map = anon_pipe_buf_map,
+ .unmap = anon_pipe_buf_unmap,
+ .release = anon_pipe_buf_release,
+};
+
static ssize_t
pipe_readv(struct file *filp, const struct iovec *_iov,
unsigned long nr_segs, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode;
+ struct pipe_inode_info *info;
int do_wakeup;
ssize_t ret;
struct iovec *iov = (struct iovec *)_iov;
do_wakeup = 0;
ret = 0;
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
+ info = inode->i_pipe;
for (;;) {
- int size = PIPE_LEN(*inode);
- if (size) {
- char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
- ssize_t chars = PIPE_MAX_RCHUNK(*inode);
+ int bufs = info->nrbufs;
+ if (bufs) {
+ int curbuf = info->curbuf;
+ struct pipe_buffer *buf = info->bufs + curbuf;
+ struct pipe_buf_operations *ops = buf->ops;
+ void *addr;
+ size_t chars = buf->len;
+ int error;
if (chars > total_len)
chars = total_len;
- if (chars > size)
- chars = size;
- if (pipe_iov_copy_to_user(iov, pipebuf, chars)) {
+ addr = ops->map(filp, info, buf);
+ error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
+ ops->unmap(info, buf);
+ if (unlikely(error)) {
if (!ret) ret = -EFAULT;
break;
}
ret += chars;
-
- PIPE_START(*inode) += chars;
- PIPE_START(*inode) &= (PIPE_SIZE - 1);
- PIPE_LEN(*inode) -= chars;
+ buf->offset += chars;
+ buf->len -= chars;
+ if (!buf->len) {
+ buf->ops = NULL;
+ ops->release(info, buf);
+ curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
+ info->curbuf = curbuf;
+ info->nrbufs = --bufs;
+ do_wakeup = 1;
+ }
total_len -= chars;
- do_wakeup = 1;
if (!total_len)
break; /* common path: read succeeded */
}
- if (PIPE_LEN(*inode)) /* test for cyclic buffers */
+ if (bufs) /* More to do? */
continue;
if (!PIPE_WRITERS(*inode))
break;
}
pipe_wait(inode);
}
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
wake_up_interruptible(PIPE_WAIT(*inode));
unsigned long nr_segs, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode;
+ struct pipe_inode_info *info;
ssize_t ret;
- size_t min;
int do_wakeup;
struct iovec *iov = (struct iovec *)_iov;
size_t total_len;
+ ssize_t chars;
total_len = iov_length(iov, nr_segs);
/* Null write succeeds. */
do_wakeup = 0;
ret = 0;
- min = total_len;
- if (min > PIPE_BUF)
- min = 1;
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
+ info = inode->i_pipe;
+
+ if (!PIPE_READERS(*inode)) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ goto out;
+ }
+
+ /* We try to merge small writes */
+ chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
+ if (info->nrbufs && chars != 0) {
+ int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
+ struct pipe_buffer *buf = info->bufs + lastbuf;
+ struct pipe_buf_operations *ops = buf->ops;
+ int offset = buf->offset + buf->len;
+ if (ops->can_merge && offset + chars <= PAGE_SIZE) {
+ void *addr = ops->map(filp, info, buf);
+ int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
+ ops->unmap(info, buf);
+ ret = error;
+ do_wakeup = 1;
+ if (error)
+ goto out;
+ buf->len += chars;
+ total_len -= chars;
+ ret = chars;
+ if (!total_len)
+ goto out;
+ }
+ }
+
for (;;) {
- int free;
+ int bufs;
if (!PIPE_READERS(*inode)) {
send_sig(SIGPIPE, current, 0);
if (!ret) ret = -EPIPE;
break;
}
- free = PIPE_FREE(*inode);
- if (free >= min) {
- /* transfer data */
- ssize_t chars = PIPE_MAX_WCHUNK(*inode);
- char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
+ bufs = info->nrbufs;
+ if (bufs < PIPE_BUFFERS) {
+ int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
+ struct pipe_buffer *buf = info->bufs + newbuf;
+ struct page *page = info->tmp_page;
+ int error;
+
+ if (!page) {
+ page = alloc_page(GFP_HIGHUSER);
+ if (unlikely(!page)) {
+ ret = ret ? : -ENOMEM;
+ break;
+ }
+ info->tmp_page = page;
+ }
/* Always wakeup, even if the copy fails. Otherwise
* we lock up (O_NONBLOCK-)readers that sleep due to
* syscall merging.
+ * FIXME! Is this really true?
*/
do_wakeup = 1;
+ chars = PAGE_SIZE;
if (chars > total_len)
chars = total_len;
- if (chars > free)
- chars = free;
- if (pipe_iov_copy_from_user(pipebuf, iov, chars)) {
+ error = pipe_iov_copy_from_user(kmap(page), iov, chars);
+ kunmap(page);
+ if (unlikely(error)) {
if (!ret) ret = -EFAULT;
break;
}
ret += chars;
- PIPE_LEN(*inode) += chars;
+ /* Insert it into the buffer array */
+ buf->page = page;
+ buf->ops = &anon_pipe_buf_ops;
+ buf->offset = 0;
+ buf->len = chars;
+ info->nrbufs = ++bufs;
+ info->tmp_page = NULL;
+
total_len -= chars;
if (!total_len)
break;
}
- if (PIPE_FREE(*inode) && ret) {
- /* handle cyclic data buffers */
- min = 1;
+ if (bufs < PIPE_BUFFERS)
continue;
- }
if (filp->f_flags & O_NONBLOCK) {
if (!ret) ret = -EAGAIN;
break;
pipe_wait(inode);
PIPE_WAITING_WRITERS(*inode)--;
}
- up(PIPE_SEM(*inode));
+out:
+ mutex_unlock(PIPE_MUTEX(*inode));
if (do_wakeup) {
wake_up_interruptible(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
}
if (ret > 0)
- inode_update_time(inode, filp->f_vfsmnt, 1); /* mtime and ctime */
+ file_update_time(filp);
return ret;
}
pipe_ioctl(struct inode *pino, struct file *filp,
unsigned int cmd, unsigned long arg)
{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct pipe_inode_info *info;
+ int count, buf, nrbufs;
+
switch (cmd) {
case FIONREAD:
- return put_user(PIPE_LEN(*pino), (int __user *)arg);
+ mutex_lock(PIPE_MUTEX(*inode));
+ info = inode->i_pipe;
+ count = 0;
+ buf = info->curbuf;
+ nrbufs = info->nrbufs;
+ while (--nrbufs >= 0) {
+ count += info->bufs[buf].len;
+ buf = (buf+1) & (PIPE_BUFFERS-1);
+ }
+ mutex_unlock(PIPE_MUTEX(*inode));
+ return put_user(count, (int __user *)arg);
default:
return -EINVAL;
}
{
unsigned int mask;
struct inode *inode = filp->f_dentry->d_inode;
+ struct pipe_inode_info *info = inode->i_pipe;
+ int nrbufs;
poll_wait(filp, PIPE_WAIT(*inode), wait);
/* Reading only -- no need for acquiring the semaphore. */
- mask = POLLIN | POLLRDNORM;
- if (PIPE_EMPTY(*inode))
- mask = POLLOUT | POLLWRNORM;
- if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
- mask |= POLLHUP;
- if (!PIPE_READERS(*inode))
- mask |= POLLERR;
+ nrbufs = info->nrbufs;
+ mask = 0;
+ if (filp->f_mode & FMODE_READ) {
+ mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
+ if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
+ mask |= POLLHUP;
+ }
+
+ if (filp->f_mode & FMODE_WRITE) {
+ mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
+ /*
+ * Most Unices do not set POLLERR for FIFOs but on Linux they
+ * behave exactly like pipes for poll().
+ */
+ if (!PIPE_READERS(*inode))
+ mask |= POLLERR;
+ }
return mask;
}
-/* FIXME: most Unices do not set POLLERR for fifos */
-#define fifo_poll pipe_poll
-
static int
pipe_release(struct inode *inode, int decr, int decw)
{
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
PIPE_READERS(*inode) -= decr;
PIPE_WRITERS(*inode) -= decw;
if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
- struct pipe_inode_info *info = inode->i_pipe;
- inode->i_pipe = NULL;
- free_page((unsigned long) info->base);
- kfree(info);
+ free_pipe_info(inode);
} else {
wake_up_interruptible(PIPE_WAIT(*inode));
kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
}
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
return 0;
}
struct inode *inode = filp->f_dentry->d_inode;
int retval;
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
if (retval < 0)
return retval;
struct inode *inode = filp->f_dentry->d_inode;
int retval;
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
if (retval < 0)
return retval;
struct inode *inode = filp->f_dentry->d_inode;
int retval;
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
if (retval >= 0)
retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
if (retval < 0)
return retval;
{
/* We could have perhaps used atomic_t, but this and friends
below are the only places. So it doesn't seem worthwhile. */
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
PIPE_READERS(*inode)++;
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
return 0;
}
static int
pipe_write_open(struct inode *inode, struct file *filp)
{
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
PIPE_WRITERS(*inode)++;
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
return 0;
}
static int
pipe_rdwr_open(struct inode *inode, struct file *filp)
{
- down(PIPE_SEM(*inode));
+ mutex_lock(PIPE_MUTEX(*inode));
if (filp->f_mode & FMODE_READ)
PIPE_READERS(*inode)++;
if (filp->f_mode & FMODE_WRITE)
PIPE_WRITERS(*inode)++;
- up(PIPE_SEM(*inode));
+ mutex_unlock(PIPE_MUTEX(*inode));
return 0;
}
.read = pipe_read,
.readv = pipe_readv,
.write = bad_pipe_w,
- .poll = fifo_poll,
+ .poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_read_open,
.release = pipe_read_release,
.read = bad_pipe_r,
.write = pipe_write,
.writev = pipe_writev,
- .poll = fifo_poll,
+ .poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_write_open,
.release = pipe_write_release,
.readv = pipe_readv,
.write = pipe_write,
.writev = pipe_writev,
- .poll = fifo_poll,
+ .poll = pipe_poll,
.ioctl = pipe_ioctl,
.open = pipe_rdwr_open,
.release = pipe_rdwr_release,
.fasync = pipe_rdwr_fasync,
};
-struct file_operations read_pipe_fops = {
+static struct file_operations read_pipe_fops = {
.llseek = no_llseek,
.read = pipe_read,
.readv = pipe_readv,
.fasync = pipe_read_fasync,
};
-struct file_operations write_pipe_fops = {
+static struct file_operations write_pipe_fops = {
.llseek = no_llseek,
.read = bad_pipe_r,
.write = pipe_write,
.fasync = pipe_write_fasync,
};
-struct file_operations rdwr_pipe_fops = {
+static struct file_operations rdwr_pipe_fops = {
.llseek = no_llseek,
.read = pipe_read,
.readv = pipe_readv,
.fasync = pipe_rdwr_fasync,
};
-struct inode* pipe_new(struct inode* inode)
+void free_pipe_info(struct inode *inode)
{
- unsigned long page;
+ int i;
+ struct pipe_inode_info *info = inode->i_pipe;
- page = __get_free_page(GFP_USER);
- if (!page)
- return NULL;
+ inode->i_pipe = NULL;
+ for (i = 0; i < PIPE_BUFFERS; i++) {
+ struct pipe_buffer *buf = info->bufs + i;
+ if (buf->ops)
+ buf->ops->release(info, buf);
+ }
+ if (info->tmp_page)
+ __free_page(info->tmp_page);
+ kfree(info);
+}
+
+struct inode* pipe_new(struct inode* inode)
+{
+ struct pipe_inode_info *info;
- inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
- if (!inode->i_pipe)
+ info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+ if (!info)
goto fail_page;
+ memset(info, 0, sizeof(*info));
+ inode->i_pipe = info;
init_waitqueue_head(PIPE_WAIT(*inode));
- PIPE_BASE(*inode) = (char*) page;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
- PIPE_WAITING_WRITERS(*inode) = 0;
PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
- *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL;
return inode;
fail_page:
- free_page(page);
return NULL;
}
close_f12_inode_i:
put_unused_fd(i);
close_f12_inode:
- free_page((unsigned long) PIPE_BASE(*inode));
- kfree(inode->i_pipe);
- inode->i_pipe = NULL;
+ free_pipe_info(inode);
iput(inode);
close_f12:
put_filp(f2);
return error;
}
-EXPORT_SYMBOL_GPL(do_pipe);
-
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
mntput(pipe_mnt);
}
-module_init(init_pipe_fs)
-module_exit(exit_pipe_fs)
+fs_initcall(init_pipe_fs);
+module_exit(exit_pipe_fs);