This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / fs / locks.c
index da59349..47efea9 100644 (file)
@@ -60,7 +60,7 @@
  *
  *  Initial implementation of mandatory locks. SunOS turned out to be
  *  a rotten model, so I implemented the "obvious" semantics.
- *  See 'linux/Documentation/mandatory.txt' for details.
+ *  See 'Documentation/mandatory.txt' for details.
  *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
  *
  *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
@@ -167,6 +167,13 @@ static inline void locks_free_lock(struct file_lock *fl)
        if (!list_empty(&fl->fl_link))
                panic("Attempting to free lock on active lock list");
 
+       if (fl->fl_ops) {
+               if (fl->fl_ops->fl_release_private)
+                       fl->fl_ops->fl_release_private(fl);
+               fl->fl_ops = NULL;
+       }
+       fl->fl_lmops = NULL;
+
        kmem_cache_free(filelock_cache, fl);
 }
 
@@ -177,15 +184,14 @@ void locks_init_lock(struct file_lock *fl)
        init_waitqueue_head(&fl->fl_wait);
        fl->fl_next = NULL;
        fl->fl_fasync = NULL;
-       fl->fl_owner = 0;
+       fl->fl_owner = NULL;
        fl->fl_pid = 0;
        fl->fl_file = NULL;
        fl->fl_flags = 0;
        fl->fl_type = 0;
        fl->fl_start = fl->fl_end = 0;
-       fl->fl_notify = NULL;
-       fl->fl_insert = NULL;
-       fl->fl_remove = NULL;
+       fl->fl_ops = NULL;
+       fl->fl_lmops = NULL;
 }
 
 EXPORT_SYMBOL(locks_init_lock);
@@ -217,10 +223,10 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
        new->fl_type = fl->fl_type;
        new->fl_start = fl->fl_start;
        new->fl_end = fl->fl_end;
-       new->fl_notify = fl->fl_notify;
-       new->fl_insert = fl->fl_insert;
-       new->fl_remove = fl->fl_remove;
-       new->fl_u = fl->fl_u;
+       new->fl_ops = fl->fl_ops;
+       new->fl_lmops = fl->fl_lmops;
+       if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
+               fl->fl_ops->fl_copy_lock(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -321,9 +327,8 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
        fl->fl_pid = current->tgid;
        fl->fl_file = filp;
        fl->fl_flags = FL_POSIX;
-       fl->fl_notify = NULL;
-       fl->fl_insert = NULL;
-       fl->fl_remove = NULL;
+       fl->fl_ops = NULL;
+       fl->fl_lmops = NULL;
 
        return assign_type(fl, l->l_type);
 }
@@ -361,9 +366,8 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
        fl->fl_pid = current->tgid;
        fl->fl_file = filp;
        fl->fl_flags = FL_POSIX;
-       fl->fl_notify = NULL;
-       fl->fl_insert = NULL;
-       fl->fl_remove = NULL;
+       fl->fl_ops = NULL;
+       fl->fl_lmops = NULL;
 
        switch (l->l_type) {
        case F_RDLCK:
@@ -397,9 +401,8 @@ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
        }
        fl->fl_start = 0;
        fl->fl_end = OFFSET_MAX;
-       fl->fl_notify = NULL;
-       fl->fl_insert = NULL;
-       fl->fl_remove = NULL;
+       fl->fl_ops = NULL;
+       fl->fl_lmops = NULL;
 
        *flp = fl;
        return 0;
@@ -414,14 +417,15 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 }
 
 /*
- * Check whether two locks have the same owner.  The apparently superfluous
- * check for fl_pid enables us to distinguish between locks set by lockd.
+ * Check whether two locks have the same owner.
  */
 static inline int
 posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 {
-       return (fl1->fl_owner == fl2->fl_owner) &&
-               (fl1->fl_pid == fl2->fl_pid);
+       if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
+               return fl2->fl_lmops == fl1->fl_lmops &&
+                       fl1->fl_lmops->fl_compare_owner(fl1, fl2);
+       return fl1->fl_owner == fl2->fl_owner;
 }
 
 /* Remove waiter from blocker's block list.
@@ -459,7 +463,8 @@ static void locks_insert_block(struct file_lock *blocker,
        }
        list_add_tail(&waiter->fl_block, &blocker->fl_block);
        waiter->fl_next = blocker;
-       list_add(&waiter->fl_link, &blocked_list);
+       if (IS_POSIX(blocker))
+               list_add(&waiter->fl_link, &blocked_list);
 }
 
 /* Wake up processes blocked waiting for blocker.
@@ -472,8 +477,8 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
                struct file_lock *waiter = list_entry(blocker->fl_block.next,
                                struct file_lock, fl_block);
                __locks_delete_block(waiter);
-               if (waiter->fl_notify)
-                       waiter->fl_notify(waiter);
+               if (waiter->fl_lmops && waiter->fl_lmops->fl_notify)
+                       waiter->fl_lmops->fl_notify(waiter);
                else
                        wake_up(&waiter->fl_wait);
        }
@@ -490,8 +495,8 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
        fl->fl_next = *pos;
        *pos = fl;
 
-       if (fl->fl_insert)
-               fl->fl_insert(fl);
+       if (fl->fl_ops && fl->fl_ops->fl_insert)
+               fl->fl_ops->fl_insert(fl);
 }
 
 /*
@@ -514,8 +519,8 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
                fl->fl_fasync = NULL;
        }
 
-       if (fl->fl_remove)
-               fl->fl_remove(fl);
+       if (fl->fl_ops && fl->fl_ops->fl_remove)
+               fl->fl_ops->fl_remove(fl);
 
        locks_wake_up_blocks(fl);
        locks_free_lock(fl);
@@ -631,24 +636,15 @@ int posix_locks_deadlock(struct file_lock *caller_fl,
                                struct file_lock *block_fl)
 {
        struct list_head *tmp;
-       fl_owner_t caller_owner, blocked_owner;
-       unsigned int     caller_pid, blocked_pid;
-
-       caller_owner = caller_fl->fl_owner;
-       caller_pid = caller_fl->fl_pid;
-       blocked_owner = block_fl->fl_owner;
-       blocked_pid = block_fl->fl_pid;
 
 next_task:
-       if (caller_owner == blocked_owner && caller_pid == blocked_pid)
+       if (posix_same_owner(caller_fl, block_fl))
                return 1;
        list_for_each(tmp, &blocked_list) {
                struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
-               if ((fl->fl_owner == blocked_owner)
-                   && (fl->fl_pid == blocked_pid)) {
+               if (posix_same_owner(fl, block_fl)) {
                        fl = fl->fl_next;
-                       blocked_owner = fl->fl_owner;
-                       blocked_pid = fl->fl_pid;
+                       block_fl = fl;
                        goto next_task;
                }
        }
@@ -911,6 +907,34 @@ int posix_lock_file(struct file *filp, struct file_lock *fl)
        return __posix_lock_file(filp->f_dentry->d_inode, fl);
 }
 
+/**
+ * posix_lock_file_wait - Apply a POSIX-style lock to a file
+ * @filp: The file to apply the lock to
+ * @fl: The lock to be applied
+ *
+ * Add a POSIX style lock to a file.
+ * We merge adjacent & overlapping locks whenever possible.
+ * POSIX locks are sorted by owner task, then by starting address
+ */
+int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
+{
+       int error;
+       might_sleep ();
+       for (;;) {
+               error = __posix_lock_file(filp->f_dentry->d_inode, fl);
+               if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+                       break;
+               error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+               if (!error)
+                       continue;
+
+               locks_delete_block(fl);
+               break;
+       }
+       return error;
+}
+EXPORT_SYMBOL(posix_lock_file_wait);
+
 /**
  * locks_mandatory_locked - Check for an active lock
  * @inode: the file to check
@@ -1294,6 +1318,33 @@ out_unlock:
        return error;
 }
 
+/**
+ * flock_lock_file_wait - Apply a FLOCK-style lock to a file
+ * @filp: The file to apply the lock to
+ * @fl: The lock to be applied
+ *
+ * Add a FLOCK style lock to a file.
+ */
+int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
+{
+       int error;
+       might_sleep();
+       for (;;) {
+               error = flock_lock_file(filp, fl);
+               if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+                       break;
+               error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+               if (!error)
+                       continue;
+
+               locks_delete_block(fl);
+               break;
+       }
+       return error;
+}
+
+EXPORT_SYMBOL(flock_lock_file_wait);
+
 /**
  *     sys_flock: - flock() system call.
  *     @fd: the file descriptor to lock.
@@ -1342,17 +1393,12 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
        if (error)
                goto out_free;
 
-       for (;;) {
-               error = flock_lock_file(filp, lock);
-               if ((error != -EAGAIN) || !can_sleep)
-                       break;
-               error = wait_event_interruptible(lock->fl_wait, !lock->fl_next);
-               if (!error)
-                       continue;
-
-               locks_delete_block(lock);
-               break;
-       }
+       if (filp->f_op && filp->f_op->flock)
+               error = filp->f_op->flock(filp,
+                                         (can_sleep) ? F_SETLKW : F_SETLK,
+                                         lock);
+       else
+               error = flock_lock_file_wait(filp, lock);
 
  out_free:
        if (list_empty(&lock->fl_link)) {
@@ -1422,7 +1468,6 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
        error = -EFAULT;
        if (!copy_to_user(l, &flock, sizeof(flock)))
                error = 0;
-  
 out:
        return error;
 }
@@ -1489,8 +1534,7 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 
        if (filp->f_op && filp->f_op->lock != NULL) {
                error = filp->f_op->lock(filp, cmd, file_lock);
-               if (error < 0)
-                       goto out;
+               goto out;
        }
 
        for (;;) {
@@ -1624,8 +1668,7 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 
        if (filp->f_op && filp->f_op->lock != NULL) {
                error = filp->f_op->lock(filp, cmd, file_lock);
-               if (error < 0)
-                       goto out;
+               goto out;
        }
 
        for (;;) {
@@ -1672,10 +1715,12 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
        lock.fl_owner = owner;
        lock.fl_pid = current->tgid;
        lock.fl_file = filp;
+       lock.fl_ops = NULL;
+       lock.fl_lmops = NULL;
 
        if (filp->f_op && filp->f_op->lock != NULL) {
                filp->f_op->lock(filp, F_SETLK, &lock);
-               /* Ignore any error -- we must remove the locks anyway */
+               goto out;
        }
 
        /* Can't use posix_lock_file here; we need to remove it no matter
@@ -1684,13 +1729,16 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
        lock_kernel();
        while (*before != NULL) {
                struct file_lock *fl = *before;
-               if (IS_POSIX(fl) && (fl->fl_owner == owner)) {
+               if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) {
                        locks_delete_lock(before);
                        continue;
                }
                before = &fl->fl_next;
        }
        unlock_kernel();
+out:
+       if (lock.fl_ops && lock.fl_ops->fl_release_private)
+               lock.fl_ops->fl_release_private(&lock);
 }
 
 EXPORT_SYMBOL(locks_remove_posix);
@@ -1707,12 +1755,23 @@ void locks_remove_flock(struct file *filp)
        if (!inode->i_flock)
                return;
 
+       if (filp->f_op && filp->f_op->flock) {
+               struct file_lock fl = { .fl_flags = FL_FLOCK,
+                                       .fl_type = F_UNLCK };
+               filp->f_op->flock(filp, F_SETLKW, &fl);
+       }
+
        lock_kernel();
        before = &inode->i_flock;
 
        while ((fl = *before) != NULL) {
                if (fl->fl_file == filp) {
-                       if (IS_FLOCK(fl)) {
+                       /*
+                        * We might have a POSIX lock that was created at the same time
+                        * the filp was closed for the last time. Just remove that too,
+                        * regardless of ownership, since nobody can own it.
+                        */
+                       if (IS_FLOCK(fl) || IS_POSIX(fl)) {
                                locks_delete_lock(before);
                                continue;
                        }
@@ -1720,6 +1779,7 @@ void locks_remove_flock(struct file *filp)
                                lease_modify(before, F_UNLCK);
                                continue;
                        }
+                       /* What? */
                        BUG();
                }
                before = &fl->fl_next;
@@ -1813,7 +1873,7 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
                               : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
        }
        if (inode) {
-#if WE_CAN_BREAK_LSLK_NOW
+#ifdef WE_CAN_BREAK_LSLK_NOW
                out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
                                inode->i_sb->s_id, inode->i_ino);
 #else
@@ -1979,31 +2039,60 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 
 EXPORT_SYMBOL(lock_may_write);
 
+static inline void __steal_locks(struct file *file, fl_owner_t from)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       struct file_lock *fl = inode->i_flock;
+
+       while (fl) {
+               if (fl->fl_file == file && fl->fl_owner == from)
+                       fl->fl_owner = current->files;
+               fl = fl->fl_next;
+       }
+}
+
+/* When getting ready for executing a binary, we make sure that current
+ * has a files_struct on its own. Before dropping the old files_struct,
+ * we take over ownership of all locks for all file descriptors we own.
+ * Note that we may accidentally steal a lock for a file that a sibling
+ * has created since the unshare_files() call.
+ */
 void steal_locks(fl_owner_t from)
 {
-       struct list_head *tmp;
+       struct files_struct *files = current->files;
+       int i, j;
 
-       if (from == current->files)
+       if (from == files)
                return;
 
        lock_kernel();
-       list_for_each(tmp, &file_lock_list) {
-               struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
-               if (fl->fl_owner == from)
-                       fl->fl_owner = current->files;
+       j = 0;
+       for (;;) {
+               unsigned long set;
+               i = j * __NFDBITS;
+               if (i >= files->max_fdset || i >= files->max_fds)
+                       break;
+               set = files->open_fds->fds_bits[j++];
+               while (set) {
+                       if (set & 1) {
+                               struct file *file = files->fd[i];
+                               if (file)
+                                       __steal_locks(file, from);
+                       }
+                       i++;
+                       set >>= 1;
+               }
        }
        unlock_kernel();
 }
-
 EXPORT_SYMBOL(steal_locks);
 
 static int __init filelock_init(void)
 {
        filelock_cache = kmem_cache_create("file_lock_cache",
-                       sizeof(struct file_lock), 0, 0, init_once, NULL);
-       if (!filelock_cache)
-               panic("cannot create file lock slab cache");
+                       sizeof(struct file_lock), 0, SLAB_PANIC,
+                       init_once, NULL);
        return 0;
 }
 
-module_init(filelock_init)
+core_initcall(filelock_init);