linux 2.6.16.38 w/ vs2.0.3-rc1

[linux-2.6.git] / fs / fcntl.c
diff --git a/fs/fcntl.c b/fs/fcntl.c

index 2676d70..31836ec 100644 (file)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -4,16 +4,21 @@
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
+#include <linux/syscalls.h>
  #include <linux/init.h>
  #include <linux/mm.h>
  #include <linux/fs.h>
  #include <linux/file.h>
+#include <linux/capability.h>
  #include <linux/dnotify.h>
  #include <linux/smp_lock.h>
  #include <linux/slab.h>
  #include <linux/module.h>
  #include <linux/security.h>
  #include <linux/ptrace.h>
+#include <linux/signal.h>
+#include <linux/rcupdate.h>
+#include <linux/vs_base.h>
  #include <linux/vs_limit.h>
  
  #include <asm/poll.h>
@@ -23,56 +28,28 @@
  void fastcall set_close_on_exec(unsigned int fd, int flag)
  {
         struct files_struct *files = current->files;
+       struct fdtable *fdt;
         spin_lock(&files->file_lock);
+       fdt = files_fdtable(files);
         if (flag)
-               FD_SET(fd, files->close_on_exec);
+               FD_SET(fd, fdt->close_on_exec);
         else
-               FD_CLR(fd, files->close_on_exec);
+               FD_CLR(fd, fdt->close_on_exec);
         spin_unlock(&files->file_lock);
  }
  
-static inline int get_close_on_exec(unsigned int fd)
+static int get_close_on_exec(unsigned int fd)
  {
         struct files_struct *files = current->files;
+       struct fdtable *fdt;
         int res;
-       spin_lock(&files->file_lock);
-       res = FD_ISSET(fd, files->close_on_exec);
-       spin_unlock(&files->file_lock);
+       rcu_read_lock();
+       fdt = files_fdtable(files);
+       res = FD_ISSET(fd, fdt->close_on_exec);
+       rcu_read_unlock();
         return res;
  }
  
-
-/* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
- * we may have blocked. 
- *
- * Should be called with the files->file_lock spinlock held for write.
- */
-static int expand_files(struct files_struct *files, int nr)
-{
-       int err, expand = 0;
-#ifdef FDSET_DEBUG     
-       printk (KERN_ERR "%s %d: nr = %d\n", __FUNCTION__, current->pid, nr);
-#endif
-       
-       if (nr >= files->max_fdset) {
-               expand = 1;
-               if ((err = expand_fdset(files, nr)))
-                       goto out;
-       }
-       if (nr >= files->max_fds) {
-               expand = 1;
-               if ((err = expand_fd_array(files, nr)))
-                       goto out;
-       }
-       err = expand;
- out:
-#ifdef FDSET_DEBUG     
-       if (err)
-               printk (KERN_ERR "%s %d: return %d\n", __FUNCTION__, current->pid, err);
-#endif
-       return err;
-}
-
  /*
   * locate_fd finds a free file descriptor in the open_fds fdset,
   * expanding the fd arrays if necessary.  Must be called with the
@@ -85,28 +62,30 @@ static int locate_fd(struct files_struct *files,
         unsigned int newfd;
         unsigned int start;
         int error;
+       struct fdtable *fdt;
  
         error = -EINVAL;
-       if (orig_start >= current->rlim[RLIMIT_NOFILE].rlim_cur)
+       if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
                 goto out;
  
  repeat:
+       fdt = files_fdtable(files);
         /*
          * Someone might have closed fd's in the range
-        * orig_start..files->next_fd
+        * orig_start..fdt->next_fd
          */
         start = orig_start;
-       if (start < files->next_fd)
-               start = files->next_fd;
+       if (start < fdt->next_fd)
+               start = fdt->next_fd;
  
         newfd = start;
-       if (start < files->max_fdset) {
-               newfd = find_next_zero_bit(files->open_fds->fds_bits,
-                       files->max_fdset, start);
+       if (start < fdt->max_fdset) {
+               newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+                       fdt->max_fdset, start);
         }
         
         error = -EMFILE;
-       if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
+       if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
                 goto out;
         if (!vx_files_avail(1))
                 goto out;
@@ -122,9 +101,15 @@ repeat:
         if (error)
                 goto repeat;
  
-       if (start <= files->next_fd)
-               files->next_fd = newfd + 1;
-       
+       /*
+        * We reacquired files_lock, so we are safe as long as
+        * we reacquire the fdtable pointer and use it while holding
+        * the lock, no one can free it during that time.
+        */
+       fdt = files_fdtable(files);
+       if (start <= fdt->next_fd)
+               fdt->next_fd = newfd + 1;
+
         error = newfd;
         
  out:
@@ -134,13 +119,16 @@ out:
  static int dupfd(struct file *file, unsigned int start)
  {
         struct files_struct * files = current->files;
+       struct fdtable *fdt;
         int fd;
  
         spin_lock(&files->file_lock);
         fd = locate_fd(files, file, start);
         if (fd >= 0) {
-               FD_SET(fd, files->open_fds);
-               FD_CLR(fd, files->close_on_exec);
+               /* locate_fd() may have expanded fdtable, load the ptr */
+               fdt = files_fdtable(files);
+               FD_SET(fd, fdt->open_fds);
+               FD_CLR(fd, fdt->close_on_exec);
                 spin_unlock(&files->file_lock);
                 vx_openfd_inc(fd);
                 fd_install(fd, file);
@@ -157,6 +145,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
         int err = -EBADF;
         struct file * file, *tofree;
         struct files_struct * files = current->files;
+       struct fdtable *fdt;
  
         spin_lock(&files->file_lock);
         if (!(file = fcheck(oldfd)))
@@ -165,7 +154,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
         if (newfd == oldfd)
                 goto out_unlock;
         err = -EBADF;
-       if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
+       if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
                 goto out_unlock;
         get_file(file);                 /* We are now finished with oldfd */
  
@@ -182,18 +171,21 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
  
         /* Yes. It's a race. In user space. Nothing sane to do */
         err = -EBUSY;
-       tofree = files->fd[newfd];
-       if (!tofree && FD_ISSET(newfd, files->open_fds))
+       fdt = files_fdtable(files);
+       tofree = fdt->fd[newfd];
+       if (!tofree && FD_ISSET(newfd, fdt->open_fds))
                 goto out_fput;
  
-       files->fd[newfd] = file;
-       FD_SET(newfd, files->open_fds);
-       FD_CLR(newfd, files->close_on_exec);
+       rcu_assign_pointer(fdt->fd[newfd], file);
+       FD_SET(newfd, fdt->open_fds);
+       FD_CLR(newfd, fdt->close_on_exec);
         spin_unlock(&files->file_lock);
-       vx_openfd_inc(newfd);
  
         if (tofree)
                 filp_close(tofree, files);
+       else
+               vx_openfd_inc(newfd);   /* fd was unused */
+
         err = newfd;
  out:
         return err;
@@ -224,8 +216,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
         struct inode * inode = filp->f_dentry->d_inode;
         int error = 0;
  
-       /* O_APPEND cannot be cleared if the file is marked as append-only */
-       if (!(arg & O_APPEND) && IS_APPEND(inode))
+       /*
+        * O_APPEND cannot be cleared if the file is marked as append-only
+        * and the file is open for write.
+        */
+       if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
                 return -EPERM;
  
         /* O_NOATIME can only be set by the owner or superuser */
@@ -244,6 +239,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
                                 return -EINVAL;
         }
  
+       if (filp->f_op && filp->f_op->check_flags)
+               error = filp->f_op->check_flags(arg);
+       if (error)
+               return error;
+
         lock_kernel();
         if ((arg ^ filp->f_flags) & FASYNC) {
                 if (filp->f_op && filp->f_op->fasync) {
@@ -290,10 +290,8 @@ void f_delown(struct file *filp)
         f_modown(filp, 0, 0, 0, 1);
  }
  
-EXPORT_SYMBOL(f_delown);
-
-long generic_file_fcntl(int fd, unsigned int cmd,
-                       unsigned long arg, struct file *filp)
+static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
+               struct file *filp)
  {
         long err = -EINVAL;
  
@@ -320,7 +318,7 @@ long generic_file_fcntl(int fd, unsigned int cmd,
                 break;
         case F_SETLK:
         case F_SETLKW:
-               err = fcntl_setlk(filp, cmd, (struct flock __user *) arg);
+               err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
                 break;
         case F_GETOWN:
                 /*
@@ -341,7 +339,7 @@ long generic_file_fcntl(int fd, unsigned int cmd,
                 break;
         case F_SETSIG:
                 /* arg == 0 restores default behaviour. */
-               if (arg < 0 || arg > _NSIG) {
+               if (!valid_signal(arg)) {
                         break;
                 }
                 err = 0;
@@ -361,17 +359,8 @@ long generic_file_fcntl(int fd, unsigned int cmd,
         }
         return err;
  }
-EXPORT_SYMBOL(generic_file_fcntl);
-
-static long do_fcntl(int fd, unsigned int cmd,
-                       unsigned long arg, struct file *filp)
-{
-       if (filp->f_op && filp->f_op->fcntl)
-               return filp->f_op->fcntl(fd, cmd, arg, filp);
-       return generic_file_fcntl(fd, cmd, arg, filp);
-}
  
-asmlinkage long sys_fcntl(int fd, unsigned int cmd, unsigned long arg)
+asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
  {      
         struct file *filp;
         long err = -EBADF;
@@ -417,7 +406,8 @@ asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg
                         break;
                 case F_SETLK64:
                 case F_SETLKW64:
-                       err = fcntl_setlk64(filp, cmd, (struct flock64 __user *) arg);
+                       err = fcntl_setlk64(fd, filp, cmd,
+                                       (struct flock64 __user *) arg);
                         break;
                 default:
                         err = do_fcntl(fd, cmd, arg, filp);
@@ -441,11 +431,12 @@ static long band_table[NSIGPOLL] = {
  };
  
  static inline int sigio_perm(struct task_struct *p,
-                             struct fown_struct *fown)
+                             struct fown_struct *fown, int sig)
  {
-       return ((fown->euid == 0) ||
-               (fown->euid == p->suid) || (fown->euid == p->uid) ||
-               (fown->uid == p->suid) || (fown->uid == p->uid));
+       return (((fown->euid == 0) ||
+                (fown->euid == p->suid) || (fown->euid == p->uid) ||
+                (fown->uid == p->suid) || (fown->uid == p->uid)) &&
+               !security_file_send_sigiotask(p, fown, sig));
  }
  
  static void send_sigio_to_task(struct task_struct *p,
@@ -453,10 +444,7 @@ static void send_sigio_to_task(struct task_struct *p,
                                int fd,
                                int reason)
  {
-       if (!sigio_perm(p, fown))
-               return;
-
-       if (security_file_send_sigiotask(p, fown, fd, reason))
+       if (!sigio_perm(p, fown, fown->signum))
                 return;
  
         switch (fown->signum) {
@@ -481,11 +469,11 @@ static void send_sigio_to_task(struct task_struct *p,
                         else
                                 si.si_band = band_table[reason - POLL_IN];
                         si.si_fd    = fd;
-                       if (!send_sig_info(fown->signum, &si, p))
+                       if (!group_send_sig_info(fown->signum, &si, p))
                                 break;
                 /* fall-through: fall back on the old plain SIGIO signal */
                 case 0:
-                       send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
+                       group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
         }
  }
  
@@ -501,16 +489,14 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
         
         read_lock(&tasklist_lock);
         if (pid > 0) {
-               p = find_task_by_pid(pid);
+               p = find_task_by_real_pid(pid);
                 if (p) {
                         send_sigio_to_task(p, fown, fd, band);
                 }
         } else {
-               struct list_head *l;
-               struct pid *pidptr;
-               for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
+               do_each_task_pid(-pid, PIDTYPE_PGID, p) {
                         send_sigio_to_task(p, fown, fd, band);
-               }
+               } while_each_task_pid(-pid, PIDTYPE_PGID, p);
         }
         read_unlock(&tasklist_lock);
   out_unlock_fown:
@@ -520,8 +506,8 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
  static void send_sigurg_to_task(struct task_struct *p,
                                  struct fown_struct *fown)
  {
-       if (sigio_perm(p, fown))
-               send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
+       if (sigio_perm(p, fown, SIGURG))
+               group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
  }
  
  int send_sigurg(struct fown_struct *fown)
@@ -538,16 +524,14 @@ int send_sigurg(struct fown_struct *fown)
         
         read_lock(&tasklist_lock);
         if (pid > 0) {
-               p = find_task_by_pid(pid);
+               p = find_task_by_real_pid(pid);
                 if (p) {
                         send_sigurg_to_task(p, fown);
                 }
         } else {
-               struct list_head *l;
-               struct pid *pidptr;
-               for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
+               do_each_task_pid(-pid, PIDTYPE_PGID, p) {
                         send_sigurg_to_task(p, fown);
-               }
+               } while_each_task_pid(-pid, PIDTYPE_PGID, p);
         }
         read_unlock(&tasklist_lock);
   out_unlock_fown:
@@ -555,7 +539,7 @@ int send_sigurg(struct fown_struct *fown)
         return ret;
  }
  
-static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(fasync_lock);
  static kmem_cache_t *fasync_cache;
  
  /*