* POSIX message queues filesystem for Linux.
*
* Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl)
- * Michal Wronski (wrona@mat.uni.torun.pl)
+ * Michal Wronski (michal.wronski@gmail.com)
*
* Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com)
* Lockless receive & send, fd based notify:
* Manfred Spraul (manfred@colorfullife.com)
*
+ * Audit: George Wilson (ltcgcw@us.ibm.com)
+ *
* This file is released under the GPL.
*/
+#include <linux/capability.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/msg.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/syscalls.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
+
#include <net/sock.h>
#include "util.h"
#define CTL_MSGSIZEMAX 4
/* default values */
-#define DFLT_QUEUESMAX 64 /* max number of message queues */
-#define DFLT_MSGMAX 40 /* max number of messages in each queue */
+#define DFLT_QUEUESMAX 256 /* max number of message queues */
+#define DFLT_MSGMAX 10 /* max number of messages in each queue */
#define HARD_MSGMAX (131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 16384 /* max message size */
+#define DFLT_MSGSIZEMAX 8192 /* max message size */
-#define NOTIFY_COOKIE_LEN 32
struct ext_wait_queue { /* queue of sleeping tasks */
struct task_struct *task;
struct mq_attr attr;
struct sigevent notify;
- pid_t notify_owner;
+ struct pid* notify_owner;
+ struct user_struct *user; /* user who created, for accounting */
struct sock *notify_sock;
struct sk_buff *notify_cookie;
static void remove_notification(struct mqueue_inode_info *info);
static spinlock_t mq_lock;
-static kmem_cache_t *mqueue_inode_cachep;
+static struct kmem_cache *mqueue_inode_cachep;
static struct vfsmount *mqueue_mnt;
static unsigned int queues_count;
return container_of(inode, struct mqueue_inode_info, vfs_inode);
}
-static struct inode *mqueue_get_inode(struct super_block *sb, int mode)
+static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
+ struct mq_attr *attr)
{
struct inode *inode;
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mtime = inode->i_ctime = inode->i_atime =
CURRENT_TIME;
if (S_ISREG(mode)) {
struct mqueue_inode_info *info;
+ struct task_struct *p = current;
+ struct user_struct *u = p->user;
+ unsigned long mq_bytes, mq_msg_tblsz;
inode->i_fop = &mqueue_file_operations;
inode->i_size = FILENT_SIZE;
init_waitqueue_head(&info->wait_q);
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->notify_owner = 0;
+ info->messages = NULL;
+ info->notify_owner = NULL;
info->qsize = 0;
+ info->user = NULL; /* set when all is ok */
memset(&info->attr, 0, sizeof(info->attr));
info->attr.mq_maxmsg = DFLT_MSGMAX;
info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
- info->messages = kmalloc(DFLT_MSGMAX * sizeof(struct msg_msg *), GFP_KERNEL);
+ if (attr) {
+ info->attr.mq_maxmsg = attr->mq_maxmsg;
+ info->attr.mq_msgsize = attr->mq_msgsize;
+ }
+ mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ mq_bytes = (mq_msg_tblsz +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+ spin_lock(&mq_lock);
+ if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+ u->mq_bytes + mq_bytes >
+ p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
+ !vx_ipcmsg_avail(p->vx_info, mq_bytes)) {
+ spin_unlock(&mq_lock);
+ goto out_inode;
+ }
+ u->mq_bytes += mq_bytes;
+ vx_ipcmsg_add(p->vx_info, u, mq_bytes);
+ spin_unlock(&mq_lock);
+
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
if (!info->messages) {
- make_bad_inode(inode);
- iput(inode);
- inode = NULL;
+ spin_lock(&mq_lock);
+ u->mq_bytes -= mq_bytes;
+ vx_ipcmsg_sub(p->vx_info, u, mq_bytes);
+ spin_unlock(&mq_lock);
+ goto out_inode;
}
+ /* all is ok */
+ info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
- inode->i_nlink++;
+ inc_nlink(inode);
/* Some things misbehave if size == 0 on a directory */
inode->i_size = 2 * DIRENT_SIZE;
inode->i_op = &mqueue_dir_inode_operations;
}
}
return inode;
+out_inode:
+ make_bad_inode(inode);
+ iput(inode);
+ return NULL;
}
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = MQUEUE_MAGIC;
sb->s_op = &mqueue_super_ops;
- inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO);
+ inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
if (!inode)
return -ENOMEM;
return 0;
}
-static struct super_block *mqueue_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data)
+static int mqueue_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, mqueue_fill_super);
+ return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
}
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
{
struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
{
struct mqueue_inode_info *ei;
- ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
+ ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
static void mqueue_delete_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
+ struct user_struct *user;
+ unsigned long mq_bytes;
int i;
if (S_ISDIR(inode->i_mode)) {
clear_inode(inode);
- if (info->messages) {
+ mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
+ (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+ user = info->user;
+ if (user) {
+ struct vx_info *vxi = lookup_vx_info(user->xid);
+
spin_lock(&mq_lock);
+ user->mq_bytes -= mq_bytes;
+ vx_ipcmsg_sub(vxi, user, mq_bytes);
queues_count--;
spin_unlock(&mq_lock);
+ put_vx_info(vxi);
+ free_uid(user);
}
}
int mode, struct nameidata *nd)
{
struct inode *inode;
+ struct mq_attr *attr = dentry->d_fsdata;
int error;
spin_lock(&mq_lock);
queues_count++;
spin_unlock(&mq_lock);
- inode = mqueue_get_inode(dir->i_sb, mode);
+ inode = mqueue_get_inode(dir->i_sb, mode, attr);
if (!inode) {
error = -ENOMEM;
spin_lock(&mq_lock);
dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
dir->i_size -= DIRENT_SIZE;
- inode->i_nlink--;
+ drop_nlink(inode);
dput(dentry);
return 0;
}
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
size_t count, loff_t * off)
{
- struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+ struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
char buffer[FILENT_SIZE];
size_t slen;
loff_t o;
(info->notify_owner &&
info->notify.sigev_notify == SIGEV_SIGNAL) ?
info->notify.sigev_signo : 0,
- info->notify_owner);
+ pid_nr(info->notify_owner));
spin_unlock(&info->lock);
buffer[sizeof(buffer)-1] = '\0';
slen = strlen(buffer)+1;
return -EFAULT;
*off = o + count;
- filp->f_dentry->d_inode->i_atime = filp->f_dentry->d_inode->i_ctime = CURRENT_TIME;
+ filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
return count;
}
-static int mqueue_flush_file(struct file *filp)
+static int mqueue_flush_file(struct file *filp, fl_owner_t id)
{
- struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+ struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
spin_lock(&info->lock);
- if (current->tgid == info->notify_owner)
+ if (task_tgid(current) == info->notify_owner)
remove_notification(info);
spin_unlock(&info->lock);
static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
{
- struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+ struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
int retval = 0;
poll_wait(filp, &info->wait_q, poll_tab);
sig_i.si_pid = current->tgid;
sig_i.si_uid = current->uid;
- kill_proc_info(info->notify.sigev_signo,
- &sig_i, info->notify_owner);
+ kill_pid_info(info->notify.sigev_signo,
+ &sig_i, info->notify_owner);
break;
case SIGEV_THREAD:
set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
break;
}
/* after notification unregisters process */
- info->notify_owner = 0;
+ put_pid(info->notify_owner);
+ info->notify_owner = NULL;
}
wake_up(&info->wait_q);
}
static void remove_notification(struct mqueue_inode_info *info)
{
- if (info->notify_owner != 0 &&
+ if (info->notify_owner != NULL &&
info->notify.sigev_notify == SIGEV_THREAD) {
set_cookie(info->notify_cookie, NOTIFY_REMOVED);
netlink_sendskb(info->notify_sock, info->notify_cookie, 0);
}
- info->notify_owner = 0;
+ put_pid(info->notify_owner);
+ info->notify_owner = NULL;
+}
+
+static int mq_attr_ok(struct mq_attr *attr)
+{
+ if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
+ return 0;
+ if (capable(CAP_SYS_RESOURCE)) {
+ if (attr->mq_maxmsg > HARD_MSGMAX)
+ return 0;
+ } else {
+ if (attr->mq_maxmsg > msg_max ||
+ attr->mq_msgsize > msgsize_max)
+ return 0;
+ }
+ /* check for overflow */
+ if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
+ return 0;
+ if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
+ (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+ (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
+ return 0;
+ return 1;
}
/*
static struct file *do_create(struct dentry *dir, struct dentry *dentry,
int oflag, mode_t mode, struct mq_attr __user *u_attr)
{
- struct file *filp;
- struct inode *inode;
- struct mqueue_inode_info *info;
- struct msg_msg **msgs = NULL;
struct mq_attr attr;
int ret;
- if (u_attr != NULL) {
+ if (u_attr) {
+ ret = -EFAULT;
if (copy_from_user(&attr, u_attr, sizeof(attr)))
- return ERR_PTR(-EFAULT);
-
- if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0)
- return ERR_PTR(-EINVAL);
- if (capable(CAP_SYS_RESOURCE)) {
- if (attr.mq_maxmsg > HARD_MSGMAX)
- return ERR_PTR(-EINVAL);
- } else {
- if (attr.mq_maxmsg > msg_max ||
- attr.mq_msgsize > msgsize_max)
- return ERR_PTR(-EINVAL);
- }
- msgs = kmalloc(attr.mq_maxmsg * sizeof(*msgs), GFP_KERNEL);
- if (!msgs)
- return ERR_PTR(-ENOMEM);
- } else {
- msgs = NULL;
+ goto out;
+ ret = -EINVAL;
+ if (!mq_attr_ok(&attr))
+ goto out;
+ /* store for use during create */
+ dentry->d_fsdata = &attr;
}
+ mode &= ~current->fs->umask;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
- if (ret) {
- kfree(msgs);
- return ERR_PTR(ret);
- }
-
- inode = dentry->d_inode;
- info = MQUEUE_I(inode);
-
- if (msgs) {
- info->attr.mq_maxmsg = attr.mq_maxmsg;
- info->attr.mq_msgsize = attr.mq_msgsize;
- kfree(info->messages);
- info->messages = msgs;
- }
+ dentry->d_fsdata = NULL;
+ if (ret)
+ goto out;
- filp = dentry_open(dentry, mqueue_mnt, oflag);
- if (!IS_ERR(filp))
- dget(dentry);
+ return dentry_open(dentry, mqueue_mnt, oflag);
- return filp;
+out:
+ dput(dentry);
+ mntput(mqueue_mnt);
+ return ERR_PTR(ret);
}
/* Opens existing queue */
{
static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };
- struct file *filp;
- if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
+ dput(dentry);
+ mntput(mqueue_mnt);
return ERR_PTR(-EINVAL);
+ }
- if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
+ if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+ dput(dentry);
+ mntput(mqueue_mnt);
return ERR_PTR(-EACCES);
+ }
- filp = dentry_open(dentry, mqueue_mnt, oflag);
-
- if (!IS_ERR(filp))
- dget(dentry);
-
- return filp;
+ return dentry_open(dentry, mqueue_mnt, oflag);
}
asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
char *name;
int fd, error;
+ error = audit_mq_open(oflag, mode, u_attr);
+ if (error != 0)
+ return error;
+
if (IS_ERR(name = getname(u_name)))
return PTR_ERR(name);
if (fd < 0)
goto out_putname;
- down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
- filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
- do_open(dentry, oflag);
+ error = -EEXIST;
+ if (oflag & O_EXCL)
+ goto out;
+ filp = do_open(dentry, oflag);
} else {
filp = do_create(mqueue_mnt->mnt_root, dentry,
oflag, mode, u_attr);
}
- } else
- filp = (dentry->d_inode) ? do_open(dentry, oflag) :
- ERR_PTR(-ENOENT);
-
- dput(dentry);
+ } else {
+ error = -ENOENT;
+ if (!dentry->d_inode)
+ goto out;
+ filp = do_open(dentry, oflag);
+ }
if (IS_ERR(filp)) {
error = PTR_ERR(filp);
fd_install(fd, filp);
goto out_upsem;
-out_putfd:
+out:
+ dput(dentry);
mntput(mqueue_mnt);
+out_putfd:
put_unused_fd(fd);
out_err:
fd = error;
out_upsem:
- up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
out_putname:
putname(name);
return fd;
if (IS_ERR(name))
return PTR_ERR(name);
- down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
if (inode)
atomic_inc(&inode->i_count);
- err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+ err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
out_err:
dput(dentry);
out_unlock:
- up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
putname(name);
if (inode)
iput(inode);
* The receiver accepts the message and returns without grabbing the queue
* spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
* are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c fore more details.
+ * ipc/sem.c for more details.
*
* The same algorithm is used for senders.
*/
list_del(&receiver->list);
receiver->state = STATE_PENDING;
wake_up_process(receiver->task);
- wmb();
+ smp_wmb();
receiver->state = STATE_READY;
}
list_del(&sender->list);
sender->state = STATE_PENDING;
wake_up_process(sender->task);
- wmb();
+ smp_wmb();
sender->state = STATE_READY;
}
long timeout;
int ret;
+ ret = audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
+ if (ret != 0)
+ return ret;
+
if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
return -EINVAL;
if (unlikely(!filp))
goto out;
- inode = filp->f_dentry->d_inode;
+ inode = filp->f_path.dentry->d_inode;
if (unlikely(filp->f_op != &mqueue_file_operations))
goto out_fput;
info = MQUEUE_I(inode);
/* First try to allocate memory, before doing anything with
* existing queues. */
- msg_ptr = load_msg((void *)u_msg_ptr, msg_len);
- if (unlikely(IS_ERR(msg_ptr))) {
+ msg_ptr = load_msg(u_msg_ptr, msg_len);
+ if (IS_ERR(msg_ptr)) {
ret = PTR_ERR(msg_ptr);
goto out_fput;
}
struct mqueue_inode_info *info;
struct ext_wait_queue wait;
+ ret = audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
+ if (ret != 0)
+ return ret;
+
timeout = prepare_timeout(u_abs_timeout);
ret = -EBADF;
if (unlikely(!filp))
goto out;
- inode = filp->f_dentry->d_inode;
+ inode = filp->f_path.dentry->d_inode;
if (unlikely(filp->f_op != &mqueue_file_operations))
goto out_fput;
info = MQUEUE_I(inode);
struct mqueue_inode_info *info;
struct sk_buff *nc;
+ ret = audit_mq_notify(mqdes, u_notification);
+ if (ret != 0)
+ return ret;
+
nc = NULL;
sock = NULL;
if (u_notification != NULL) {
notification.sigev_notify != SIGEV_THREAD))
return -EINVAL;
if (notification.sigev_notify == SIGEV_SIGNAL &&
- (notification.sigev_signo < 0 ||
- notification.sigev_signo > _NSIG)) {
+ !valid_signal(notification.sigev_signo)) {
return -EINVAL;
}
if (notification.sigev_notify == SIGEV_THREAD) {
goto out;
}
- ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
+ ret = netlink_attachskb(sock, nc, 0,
+ MAX_SCHEDULE_TIMEOUT, NULL);
if (ret == 1)
goto retry;
if (ret) {
if (!filp)
goto out;
- inode = filp->f_dentry->d_inode;
+ inode = filp->f_path.dentry->d_inode;
if (unlikely(filp->f_op != &mqueue_file_operations))
goto out_fput;
info = MQUEUE_I(inode);
ret = 0;
spin_lock(&info->lock);
if (u_notification == NULL) {
- if (info->notify_owner == current->tgid) {
+ if (info->notify_owner == task_tgid(current)) {
remove_notification(info);
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}
- } else if (info->notify_owner != 0) {
+ } else if (info->notify_owner != NULL) {
ret = -EBUSY;
} else {
switch (notification.sigev_notify) {
info->notify.sigev_notify = SIGEV_SIGNAL;
break;
}
- info->notify_owner = current->tgid;
+
+ info->notify_owner = get_pid(task_tgid(current));
inode->i_atime = inode->i_ctime = CURRENT_TIME;
}
spin_unlock(&info->lock);
if (!filp)
goto out;
- inode = filp->f_dentry->d_inode;
+ inode = filp->f_path.dentry->d_inode;
if (unlikely(filp->f_op != &mqueue_file_operations))
goto out_fput;
info = MQUEUE_I(inode);
omqstat = info->attr;
omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
if (u_mqstat) {
+ ret = audit_mq_getsetattr(mqdes, &mqstat);
+ if (ret != 0)
+ goto out;
if (mqstat.mq_flags & O_NONBLOCK)
filp->f_flags |= O_NONBLOCK;
else
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
+ /* ignore failues - they are not fatal */
mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
- if (!mq_sysctl_table) {
- error = -ENOMEM;
- goto out_cache;
- }
error = register_filesystem(&mqueue_fs_type);
if (error)
out_filesystem:
unregister_filesystem(&mqueue_fs_type);
out_sysctl:
- unregister_sysctl_table(mq_sysctl_table);
-out_cache:
- if (kmem_cache_destroy(mqueue_inode_cachep)) {
- printk(KERN_INFO
- "mqueue_inode_cache: not all structures were freed\n");
- }
+ if (mq_sysctl_table)
+ unregister_sysctl_table(mq_sysctl_table);
+ kmem_cache_destroy(mqueue_inode_cachep);
return error;
}