X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fread_write.c;fp=fs%2Fread_write.c;h=6f899411029da8b5b7ae2494d0c0432440a36f96;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=3f7a1a62165f626e984de4849ce053cc8a824f65;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/read_write.c b/fs/read_write.c index 3f7a1a621..6f8994110 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,13 +15,15 @@ #include #include #include +#include "read_write.h" #include #include -struct file_operations generic_ro_fops = { +const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, - .read = generic_file_read, + .read = do_sync_read, + .aio_read = generic_file_aio_read, .mmap = generic_file_readonly_mmap, .sendfile = generic_file_sendfile, }; @@ -62,13 +64,13 @@ loff_t remote_llseek(struct file *file, loff_t offset, int origin) lock_kernel(); switch (origin) { case 2: - offset += i_size_read(file->f_dentry->d_inode); + offset += i_size_read(file->f_path.dentry->d_inode); break; case 1: offset += file->f_pos; } retval = -EINVAL; - if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { + if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) { if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; @@ -93,7 +95,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) lock_kernel(); switch (origin) { case 2: - offset += i_size_read(file->f_dentry->d_inode); + offset += i_size_read(file->f_path.dentry->d_inode); break; case 1: offset += file->f_pos; @@ -201,8 +203,8 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) goto Einval; - inode = file->f_dentry->d_inode; - if (inode->i_flock && MANDATORY_LOCK(inode)) { + inode = file->f_path.dentry->d_inode; + if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { int retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); @@ -227,14 +229,20 @@ static void wait_on_retry_sync_kiocb(struct kiocb *iocb) ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -265,7 +273,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) else ret = do_sync_read(file, buf, count, pos); if (ret > 0) { - fsnotify_access(file->f_dentry); + fsnotify_access(file->f_path.dentry); current->rchar += ret; } current->syscr++; @@ -279,14 +287,20 @@ EXPORT_SYMBOL(vfs_read); ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -317,7 +331,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ else ret = do_sync_write(file, buf, count, pos); if (ret > 0) { - fsnotify_modify(file->f_dentry); + fsnotify_modify(file->f_path.dentry); current->wchar += ret; } current->syscw++; @@ -436,80 +450,155 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) return seg; } -EXPORT_SYMBOL(iov_shorten); +ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) +{ + struct kiocb kiocb; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_left = len; + kiocb.ki_nbytes = len; + + for (;;) { + ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; + wait_on_retry_sync_kiocb(&kiocb); + } + + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +/* Do it by hand, with file-ops */ +ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, io_fn_t fn) +{ + struct iovec *vector = iov; + ssize_t ret = 0; + + while (nr_segs > 0) { + void __user *base; + size_t len; + ssize_t nr; + + base = vector->iov_base; + len = vector->iov_len; + vector++; + nr_segs--; + + nr = fn(filp, base, len, ppos); + + if (nr < 0) { + if (!ret) + ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + + return ret; +} /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer) + { + unsigned long seg; + ssize_t ret; + struct iovec *iov = fast_pointer; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) { + ret = 0; + goto out; + } + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (nr_segs > UIO_MAXIOV) { + ret = -EINVAL; + goto out; + } + if (nr_segs > fast_segs) { + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) { + ret = -ENOMEM; + goto out; + } + } + if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { + ret = -EFAULT; + goto out; + } + + /* + * According to the Single Unix Specification we should return EINVAL + * if an element length is < 0 when cast to ssize_t or if the + * total length would overflow the ssize_t return value of the + * system call. + */ + ret = 0; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; + + /* see if we we're about to use an invalid len or if + * it's about to overflow ssize_t */ + if (len < 0 || (ret + len < ret)) { + ret = -EINVAL; + goto out; + } + if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { + ret = -EFAULT; + goto out; + } + + ret += len; + } +out: + *ret_pointer = iov; + return ret; +} + static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) { - typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); - typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *vector; + struct iovec *iov = iovstack; ssize_t ret; - int seg; io_fn_t fn; iov_fn_t fnv; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - ret = 0; - if (nr_segs == 0) - goto out; - - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ - ret = -EINVAL; - if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) - goto out; - if (!file->f_op) - goto out; - if (nr_segs > UIO_FASTIOV) { - ret = -ENOMEM; - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; - } - ret = -EFAULT; - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) + if (!file->f_op) { + ret = -EINVAL; goto out; + } - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; - - if (len < 0) /* size_t not fitting an ssize_t .. */ - goto out; - if (unlikely(!access_ok(vrfy_dir(type), buf, len))) - goto Efault; - tot_len += len; - if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ - goto out; - } - if (tot_len == 0) { - ret = 0; + ret = rw_copy_check_uvector(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), iovstack, &iov); + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -520,52 +609,28 @@ static ssize_t do_readv_writev(int type, struct file *file, fnv = NULL; if (type == READ) { fn = file->f_op->read; - fnv = file->f_op->readv; + fnv = file->f_op->aio_read; } else { fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - ret = fnv(file, iov, nr_segs, pos); - goto out; + fnv = file->f_op->aio_write; } - /* Do it by hand, with file-ops */ - ret = 0; - vector = iov; - while (nr_segs > 0) { - void __user * base; - size_t len; - ssize_t nr; - - base = vector->iov_base; - len = vector->iov_len; - vector++; - nr_segs--; - - nr = fn(file, base, len, pos); + if (fnv) + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, + pos, fnv); + else + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); - if (nr < 0) { - if (!ret) ret = nr; - break; - } - ret += nr; - if (nr != len) - break; - } out: if (iov != iovstack) kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) - fsnotify_access(file->f_dentry); + fsnotify_access(file->f_path.dentry); else - fsnotify_modify(file->f_dentry); + fsnotify_modify(file->f_path.dentry); } return ret; -Efault: - ret = -EFAULT; - goto out; } ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, @@ -573,7 +638,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -586,7 +651,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -636,12 +701,77 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) return ret; } +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos, + size_t count, loff_t max) +{ + struct inode * in_inode, * out_inode; + loff_t pos; + ssize_t ret; + + /* verify in_file */ + in_inode = in_file->f_path.dentry->d_inode; + if (!in_inode) + return -EINVAL; + if (!in_file->f_op || !in_file->f_op->sendfile) + return -EINVAL; + + if (!ppos) + ppos = &in_file->f_pos; + else + if (!(in_file->f_mode & FMODE_PREAD)) + return -ESPIPE; + + ret = rw_verify_area(READ, in_file, ppos, count); + if (ret < 0) + return ret; + count = ret; + + /* verify out_file */ + out_inode = out_file->f_path.dentry->d_inode; + if (!out_inode) + return -EINVAL; + if (!out_file->f_op || !out_file->f_op->sendpage) + return -EINVAL; + + ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + if (ret < 0) + return ret; + count = ret; + + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) + return ret; + + if (!max) + max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); + + pos = *ppos; + if (unlikely(pos < 0)) + return -EINVAL; + if (unlikely(pos + count > max)) { + if (pos >= max) + return -EOVERFLOW; + count = max - pos; + } + + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + + if (ret > 0) { + current->rchar += ret; + current->wchar += ret; + } + + if (*ppos > max) + return -EOVERFLOW; + return ret; +} + +EXPORT_SYMBOL(vfs_sendfile); + static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; - loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; @@ -654,22 +784,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; - if (!in_inode) - goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) - goto fput_in; - retval = -ESPIPE; - if (!ppos) - ppos = &in_file->f_pos; - else - if (!(in_file->f_mode & FMODE_PREAD)) - goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval < 0) - goto fput_in; - count = retval; retval = security_file_permission (in_file, MAY_READ); if (retval) @@ -684,45 +798,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) - goto fput_out; - out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval < 0) - goto fput_out; - count = retval; - - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - - if (!max) - max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); - - pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; - if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; - if (pos >= max) - goto fput_out; - count = max - pos; - } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + retval = vfs_sendfile(out_file, in_file, ppos, count, max); - if (retval > 0) { - current->rchar += retval; - current->wchar += retval; - } current->syscr++; current->syscw++; - if (*ppos > max) - retval = -EOVERFLOW; - fput_out: fput_light(out_file, fput_needed_out); fput_in: