* POSIX message queues filesystem for Linux.
*
* Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl)
- * Michal Wronski (wrona@mat.uni.torun.pl)
+ * Michal Wronski (Michal.Wronski@motorola.com)
*
* Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com)
* Lockless receive & send, fd based notify:
* This file is released under the GPL.
*/
+#include <linux/capability.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/msg.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
+
#include <net/sock.h>
#include "util.h"
#define CTL_MSGSIZEMAX 4
/* default values */
-#define DFLT_QUEUESMAX 64 /* max number of message queues */
-#define DFLT_MSGMAX 40 /* max number of messages in each queue */
+#define DFLT_QUEUESMAX 256 /* max number of message queues */
+#define DFLT_MSGMAX 10 /* max number of messages in each queue */
#define HARD_MSGMAX (131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 16384 /* max message size */
+#define DFLT_MSGSIZEMAX 8192 /* max message size */
-#define NOTIFY_COOKIE_LEN 32
struct ext_wait_queue { /* queue of sleeping tasks */
struct task_struct *task;
struct sigevent notify;
pid_t notify_owner;
+ struct user_struct *user; /* user who created, for accounting */
struct sock *notify_sock;
struct sk_buff *notify_cookie;
if (S_ISREG(mode)) {
struct mqueue_inode_info *info;
+ struct task_struct *p = current;
+ struct user_struct *u = p->user;
+ unsigned long mq_bytes, mq_msg_tblsz;
inode->i_fop = &mqueue_file_operations;
inode->i_size = FILENT_SIZE;
init_waitqueue_head(&info->wait_q);
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
+ info->messages = NULL;
info->notify_owner = 0;
info->qsize = 0;
+ info->user = NULL; /* set when all is ok */
memset(&info->attr, 0, sizeof(info->attr));
info->attr.mq_maxmsg = DFLT_MSGMAX;
info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
info->attr.mq_maxmsg = attr->mq_maxmsg;
info->attr.mq_msgsize = attr->mq_msgsize;
}
- info->messages = kmalloc(info->attr.mq_maxmsg * sizeof(struct msg_msg *), GFP_KERNEL);
+ mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ mq_bytes = (mq_msg_tblsz +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+ spin_lock(&mq_lock);
+ if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+ u->mq_bytes + mq_bytes >
+ p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
+ !vx_ipcmsg_avail(p->vx_info, mq_bytes)) {
+ spin_unlock(&mq_lock);
+ goto out_inode;
+ }
+ u->mq_bytes += mq_bytes;
+ vx_ipcmsg_add(p->vx_info, u, mq_bytes);
+ spin_unlock(&mq_lock);
+
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
if (!info->messages) {
- make_bad_inode(inode);
- iput(inode);
- inode = NULL;
+ spin_lock(&mq_lock);
+ u->mq_bytes -= mq_bytes;
+ vx_ipcmsg_sub(p->vx_info, u, mq_bytes);
+ spin_unlock(&mq_lock);
+ goto out_inode;
}
+ /* all is ok */
+ info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
inode->i_nlink++;
/* Some things misbehave if size == 0 on a directory */
}
}
return inode;
+out_inode:
+ make_bad_inode(inode);
+ iput(inode);
+ return NULL;
}
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
static void mqueue_delete_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
+ struct user_struct *user;
+ unsigned long mq_bytes;
int i;
if (S_ISDIR(inode->i_mode)) {
clear_inode(inode);
- if (info->messages) {
+ mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+ user = info->user;
+ if (user) {
+ struct vx_info *vxi = lookup_vx_info(user->xid);
+
spin_lock(&mq_lock);
+ user->mq_bytes -= mq_bytes;
+ vx_ipcmsg_sub(vxi, user, mq_bytes);
queues_count--;
spin_unlock(&mq_lock);
+ put_vx_info(vxi);
+ free_uid(user);
}
}
info->notify_owner = 0;
}
+static int mq_attr_ok(struct mq_attr *attr)
+{
+ if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
+ return 0;
+ if (capable(CAP_SYS_RESOURCE)) {
+ if (attr->mq_maxmsg > HARD_MSGMAX)
+ return 0;
+ } else {
+ if (attr->mq_maxmsg > msg_max ||
+ attr->mq_msgsize > msgsize_max)
+ return 0;
+ }
+ /* check for overflow */
+ if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
+ return 0;
+ if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
+ (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+ (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
+ return 0;
+ return 1;
+}
+
/*
* Invoked when creating a new queue via sys_mq_open
*/
static struct file *do_create(struct dentry *dir, struct dentry *dentry,
int oflag, mode_t mode, struct mq_attr __user *u_attr)
{
- struct file *filp;
struct mq_attr attr;
int ret;
- if (u_attr != NULL) {
+ if (u_attr) {
+ ret = -EFAULT;
if (copy_from_user(&attr, u_attr, sizeof(attr)))
- return ERR_PTR(-EFAULT);
-
- if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0)
- return ERR_PTR(-EINVAL);
- if (capable(CAP_SYS_RESOURCE)) {
- if (attr.mq_maxmsg > HARD_MSGMAX)
- return ERR_PTR(-EINVAL);
- } else {
- if (attr.mq_maxmsg > msg_max ||
- attr.mq_msgsize > msgsize_max)
- return ERR_PTR(-EINVAL);
- }
+ goto out;
+ ret = -EINVAL;
+ if (!mq_attr_ok(&attr))
+ goto out;
/* store for use during create */
dentry->d_fsdata = &attr;
}
+ mode &= ~current->fs->umask;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
dentry->d_fsdata = NULL;
if (ret)
- return ERR_PTR(ret);
+ goto out;
- filp = dentry_open(dentry, mqueue_mnt, oflag);
- if (!IS_ERR(filp))
- dget(dentry);
+ return dentry_open(dentry, mqueue_mnt, oflag);
- return filp;
+out:
+ dput(dentry);
+ mntput(mqueue_mnt);
+ return ERR_PTR(ret);
}
/* Opens existing queue */
{
static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };
- struct file *filp;
- if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
+ dput(dentry);
+ mntput(mqueue_mnt);
return ERR_PTR(-EINVAL);
+ }
- if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
+ if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+ dput(dentry);
+ mntput(mqueue_mnt);
return ERR_PTR(-EACCES);
+ }
- filp = dentry_open(dentry, mqueue_mnt, oflag);
-
- if (!IS_ERR(filp))
- dget(dentry);
-
- return filp;
+ return dentry_open(dentry, mqueue_mnt, oflag);
}
asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
if (fd < 0)
goto out_putname;
- down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
- filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
- do_open(dentry, oflag);
+ error = -EEXIST;
+ if (oflag & O_EXCL)
+ goto out;
+ filp = do_open(dentry, oflag);
} else {
filp = do_create(mqueue_mnt->mnt_root, dentry,
oflag, mode, u_attr);
}
- } else
- filp = (dentry->d_inode) ? do_open(dentry, oflag) :
- ERR_PTR(-ENOENT);
-
- dput(dentry);
+ } else {
+ error = -ENOENT;
+ if (!dentry->d_inode)
+ goto out;
+ filp = do_open(dentry, oflag);
+ }
if (IS_ERR(filp)) {
error = PTR_ERR(filp);
fd_install(fd, filp);
goto out_upsem;
-out_putfd:
+out:
+ dput(dentry);
mntput(mqueue_mnt);
+out_putfd:
put_unused_fd(fd);
out_err:
fd = error;
out_upsem:
- up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
out_putname:
putname(name);
return fd;
if (IS_ERR(name))
return PTR_ERR(name);
- down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
if (inode)
atomic_inc(&inode->i_count);
- err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+ err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
out_err:
dput(dentry);
out_unlock:
- up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
putname(name);
if (inode)
iput(inode);
* The receiver accepts the message and returns without grabbing the queue
* spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
* are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c fore more details.
+ * ipc/sem.c for more details.
*
* The same algorithm is used for senders.
*/
list_del(&receiver->list);
receiver->state = STATE_PENDING;
wake_up_process(receiver->task);
- wmb();
+ smp_wmb();
receiver->state = STATE_READY;
}
list_del(&sender->list);
sender->state = STATE_PENDING;
wake_up_process(sender->task);
- wmb();
+ smp_wmb();
sender->state = STATE_READY;
}
/* First try to allocate memory, before doing anything with
* existing queues. */
msg_ptr = load_msg(u_msg_ptr, msg_len);
- if (unlikely(IS_ERR(msg_ptr))) {
+ if (IS_ERR(msg_ptr)) {
ret = PTR_ERR(msg_ptr);
goto out_fput;
}
notification.sigev_notify != SIGEV_THREAD))
return -EINVAL;
if (notification.sigev_notify == SIGEV_SIGNAL &&
- (notification.sigev_signo < 0 ||
- notification.sigev_signo > _NSIG)) {
+ !valid_signal(notification.sigev_signo)) {
return -EINVAL;
}
if (notification.sigev_notify == SIGEV_THREAD) {
goto out;
}
- ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
+ ret = netlink_attachskb(sock, nc, 0,
+ MAX_SCHEDULE_TIMEOUT, NULL);
if (ret == 1)
goto retry;
if (ret) {
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
+ /* ignore failues - they are not fatal */
mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
- if (!mq_sysctl_table) {
- error = -ENOMEM;
- goto out_cache;
- }
error = register_filesystem(&mqueue_fs_type);
if (error)
out_filesystem:
unregister_filesystem(&mqueue_fs_type);
out_sysctl:
- unregister_sysctl_table(mq_sysctl_table);
-out_cache:
+ if (mq_sysctl_table)
+ unregister_sysctl_table(mq_sysctl_table);
if (kmem_cache_destroy(mqueue_inode_cachep)) {
printk(KERN_INFO
"mqueue_inode_cache: not all structures were freed\n");