Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / ipc / mqueue.c
index 40a8d41..6c0a574 100644 (file)
@@ -2,7 +2,7 @@
  * POSIX message queues filesystem for Linux.
  *
  * Copyright (C) 2003,2004  Krzysztof Benedyczak    (golbi@mat.uni.torun.pl)
- *                          Michal Wronski          (wrona@mat.uni.torun.pl)
+ *                          Michal Wronski          (Michal.Wronski@motorola.com)
  *
  * Spinlocks:               Mohamed Abbas           (abbas.mohamed@intel.com)
  * Lockless receive & send, fd based notify:
@@ -11,6 +11,7 @@
  * This file is released under the GPL.
  */
 
+#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/msg.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
+
 #include <net/sock.h>
 #include "util.h"
 
 #define CTL_MSGSIZEMAX         4
 
 /* default values */
-#define DFLT_QUEUESMAX 64      /* max number of message queues */
-#define DFLT_MSGMAX    40      /* max number of messages in each queue */
+#define DFLT_QUEUESMAX 256     /* max number of message queues */
+#define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 16384  /* max message size */
+#define DFLT_MSGSIZEMAX 8192   /* max message size */
 
-#define NOTIFY_COOKIE_LEN      32
 
 struct ext_wait_queue {                /* queue of sleeping tasks */
        struct task_struct *task;
@@ -67,6 +73,7 @@ struct mqueue_inode_info {
 
        struct sigevent notify;
        pid_t notify_owner;
+       struct user_struct *user;       /* user who created, for accounting */
        struct sock *notify_sock;
        struct sk_buff *notify_cookie;
 
@@ -114,6 +121,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 
                if (S_ISREG(mode)) {
                        struct mqueue_inode_info *info;
+                       struct task_struct *p = current;
+                       struct user_struct *u = p->user;
+                       unsigned long mq_bytes, mq_msg_tblsz;
 
                        inode->i_fop = &mqueue_file_operations;
                        inode->i_size = FILENT_SIZE;
@@ -123,8 +133,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
                        init_waitqueue_head(&info->wait_q);
                        INIT_LIST_HEAD(&info->e_wait_q[0].list);
                        INIT_LIST_HEAD(&info->e_wait_q[1].list);
+                       info->messages = NULL;
                        info->notify_owner = 0;
                        info->qsize = 0;
+                       info->user = NULL;      /* set when all is ok */
                        memset(&info->attr, 0, sizeof(info->attr));
                        info->attr.mq_maxmsg = DFLT_MSGMAX;
                        info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
@@ -132,12 +144,32 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
                                info->attr.mq_maxmsg = attr->mq_maxmsg;
                                info->attr.mq_msgsize = attr->mq_msgsize;
                        }
-                       info->messages = kmalloc(info->attr.mq_maxmsg * sizeof(struct msg_msg *), GFP_KERNEL);
+                       mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+                       mq_bytes = (mq_msg_tblsz +
+                               (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+
+                       spin_lock(&mq_lock);
+                       if (u->mq_bytes + mq_bytes < u->mq_bytes ||
+                           u->mq_bytes + mq_bytes >
+                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
+                           !vx_ipcmsg_avail(p->vx_info, mq_bytes)) {
+                               spin_unlock(&mq_lock);
+                               goto out_inode;
+                       }
+                       u->mq_bytes += mq_bytes;
+                       vx_ipcmsg_add(p->vx_info, u, mq_bytes);
+                       spin_unlock(&mq_lock);
+
+                       info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
                        if (!info->messages) {
-                               make_bad_inode(inode);
-                               iput(inode);
-                               inode = NULL;
+                               spin_lock(&mq_lock);
+                               u->mq_bytes -= mq_bytes;
+                               vx_ipcmsg_sub(p->vx_info, u, mq_bytes);
+                               spin_unlock(&mq_lock);
+                               goto out_inode;
                        }
+                       /* all is ok */
+                       info->user = get_uid(u);
                } else if (S_ISDIR(mode)) {
                        inode->i_nlink++;
                        /* Some things misbehave if size == 0 on a directory */
@@ -147,6 +179,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
                }
        }
        return inode;
+out_inode:
+       make_bad_inode(inode);
+       iput(inode);
+       return NULL;
 }
 
 static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
@@ -205,6 +241,8 @@ static void mqueue_destroy_inode(struct inode *inode)
 static void mqueue_delete_inode(struct inode *inode)
 {
        struct mqueue_inode_info *info;
+       struct user_struct *user;
+       unsigned long mq_bytes;
        int i;
 
        if (S_ISDIR(inode->i_mode)) {
@@ -220,10 +258,19 @@ static void mqueue_delete_inode(struct inode *inode)
 
        clear_inode(inode);
 
-       if (info->messages) {
+       mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
+                  (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+       user = info->user;
+       if (user) {
+               struct vx_info *vxi = lookup_vx_info(user->xid);
+
                spin_lock(&mq_lock);
+               user->mq_bytes -= mq_bytes;
+               vx_ipcmsg_sub(vxi, user, mq_bytes);
                queues_count--;
                spin_unlock(&mq_lock);
+               put_vx_info(vxi);
+               free_uid(user);
        }
 }
 
@@ -534,44 +581,60 @@ static void remove_notification(struct mqueue_inode_info *info)
        info->notify_owner = 0;
 }
 
+static int mq_attr_ok(struct mq_attr *attr)
+{
+       if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
+               return 0;
+       if (capable(CAP_SYS_RESOURCE)) {
+               if (attr->mq_maxmsg > HARD_MSGMAX)
+                       return 0;
+       } else {
+               if (attr->mq_maxmsg > msg_max ||
+                               attr->mq_msgsize > msgsize_max)
+                       return 0;
+       }
+       /* check for overflow */
+       if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
+               return 0;
+       if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
+           (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+           (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
+               return 0;
+       return 1;
+}
+
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct dentry *dir, struct dentry *dentry,
                        int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
-       struct file *filp;
        struct mq_attr attr;
        int ret;
 
-       if (u_attr != NULL) {
+       if (u_attr) {
+               ret = -EFAULT;
                if (copy_from_user(&attr, u_attr, sizeof(attr)))
-                       return ERR_PTR(-EFAULT);
-
-               if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0)
-                       return ERR_PTR(-EINVAL);
-               if (capable(CAP_SYS_RESOURCE)) {
-                       if (attr.mq_maxmsg > HARD_MSGMAX)
-                               return ERR_PTR(-EINVAL);
-               } else {
-                       if (attr.mq_maxmsg > msg_max ||
-                                       attr.mq_msgsize > msgsize_max)
-                               return ERR_PTR(-EINVAL);
-               }
+                       goto out;
+               ret = -EINVAL;
+               if (!mq_attr_ok(&attr))
+                       goto out;
                /* store for use during create */
                dentry->d_fsdata = &attr;
        }
 
+       mode &= ~current->fs->umask;
        ret = vfs_create(dir->d_inode, dentry, mode, NULL);
        dentry->d_fsdata = NULL;
        if (ret)
-               return ERR_PTR(ret);
+               goto out;
 
-       filp = dentry_open(dentry, mqueue_mnt, oflag);
-       if (!IS_ERR(filp))
-               dget(dentry);
+       return dentry_open(dentry, mqueue_mnt, oflag);
 
-       return filp;
+out:
+       dput(dentry);
+       mntput(mqueue_mnt);
+       return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
@@ -579,20 +642,20 @@ static struct file *do_open(struct dentry *dentry, int oflag)
 {
 static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
                                        MAY_READ | MAY_WRITE };
-       struct file *filp;
 
-       if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+       if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
+               dput(dentry);
+               mntput(mqueue_mnt);
                return ERR_PTR(-EINVAL);
+       }
 
-       if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
+       if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+               dput(dentry);
+               mntput(mqueue_mnt);
                return ERR_PTR(-EACCES);
+       }
 
-       filp = dentry_open(dentry, mqueue_mnt, oflag);
-
-       if (!IS_ERR(filp))
-               dget(dentry);
-
-       return filp;
+       return dentry_open(dentry, mqueue_mnt, oflag);
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
@@ -610,7 +673,7 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
        if (fd < 0)
                goto out_putname;
 
-       down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+       mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
        dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
        if (IS_ERR(dentry)) {
                error = PTR_ERR(dentry);
@@ -620,17 +683,20 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 
        if (oflag & O_CREAT) {
                if (dentry->d_inode) {  /* entry already exists */
-                       filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
-                                       do_open(dentry, oflag);
+                       error = -EEXIST;
+                       if (oflag & O_EXCL)
+                               goto out;
+                       filp = do_open(dentry, oflag);
                } else {
                        filp = do_create(mqueue_mnt->mnt_root, dentry,
                                                oflag, mode, u_attr);
                }
-       } else
-               filp = (dentry->d_inode) ? do_open(dentry, oflag) :
-                                       ERR_PTR(-ENOENT);
-
-       dput(dentry);
+       } else {
+               error = -ENOENT;
+               if (!dentry->d_inode)
+                       goto out;
+               filp = do_open(dentry, oflag);
+       }
 
        if (IS_ERR(filp)) {
                error = PTR_ERR(filp);
@@ -641,13 +707,15 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
        fd_install(fd, filp);
        goto out_upsem;
 
-out_putfd:
+out:
+       dput(dentry);
        mntput(mqueue_mnt);
+out_putfd:
        put_unused_fd(fd);
 out_err:
        fd = error;
 out_upsem:
-       up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+       mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
 out_putname:
        putname(name);
        return fd;
@@ -664,7 +732,7 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
        if (IS_ERR(name))
                return PTR_ERR(name);
 
-       down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+       mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
        dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -680,12 +748,12 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
        if (inode)
                atomic_inc(&inode->i_count);
 
-       err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+       err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
 out_err:
        dput(dentry);
 
 out_unlock:
-       up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+       mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
        putname(name);
        if (inode)
                iput(inode);
@@ -703,7 +771,7 @@ out_unlock:
  * The receiver accepts the message and returns without grabbing the queue
  * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
  * are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c fore more details.
+ * ipc/sem.c for more details.
  *
  * The same algorithm is used for senders.
  */
@@ -719,7 +787,7 @@ static inline void pipelined_send(struct mqueue_inode_info *info,
        list_del(&receiver->list);
        receiver->state = STATE_PENDING;
        wake_up_process(receiver->task);
-       wmb();
+       smp_wmb();
        receiver->state = STATE_READY;
 }
 
@@ -738,7 +806,7 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
        list_del(&sender->list);
        sender->state = STATE_PENDING;
        wake_up_process(sender->task);
-       wmb();
+       smp_wmb();
        sender->state = STATE_READY;
 }
 
@@ -781,7 +849,7 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
        /* First try to allocate memory, before doing anything with
         * existing queues. */
        msg_ptr = load_msg(u_msg_ptr, msg_len);
-       if (unlikely(IS_ERR(msg_ptr))) {
+       if (IS_ERR(msg_ptr)) {
                ret = PTR_ERR(msg_ptr);
                goto out_fput;
        }
@@ -928,8 +996,7 @@ asmlinkage long sys_mq_notify(mqd_t mqdes,
                             notification.sigev_notify != SIGEV_THREAD))
                        return -EINVAL;
                if (notification.sigev_notify == SIGEV_SIGNAL &&
-                       (notification.sigev_signo < 0 ||
-                        notification.sigev_signo > _NSIG)) {
+                       !valid_signal(notification.sigev_signo)) {
                        return -EINVAL;
                }
                if (notification.sigev_notify == SIGEV_THREAD) {
@@ -961,7 +1028,8 @@ retry:
                                goto out;
                        }
 
-                       ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
+                       ret = netlink_attachskb(sock, nc, 0,
+                                       MAX_SCHEDULE_TIMEOUT, NULL);
                        if (ret == 1)
                                goto retry;
                        if (ret) {
@@ -1171,11 +1239,8 @@ static int __init init_mqueue_fs(void)
        if (mqueue_inode_cachep == NULL)
                return -ENOMEM;
 
+       /* ignore failues - they are not fatal */
        mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
-       if (!mq_sysctl_table) {
-               error = -ENOMEM;
-               goto out_cache;
-       }
 
        error = register_filesystem(&mqueue_fs_type);
        if (error)
@@ -1195,8 +1260,8 @@ static int __init init_mqueue_fs(void)
 out_filesystem:
        unregister_filesystem(&mqueue_fs_type);
 out_sysctl:
-       unregister_sysctl_table(mq_sysctl_table);
-out_cache:
+       if (mq_sysctl_table)
+               unregister_sysctl_table(mq_sysctl_table);
        if (kmem_cache_destroy(mqueue_inode_cachep)) {
                printk(KERN_INFO
                        "mqueue_inode_cache: not all structures were freed\n");