Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...

[linux-2.6.git] / Documentation / filesystems / Locking
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking

index e0ef1f8..247d7f6 100644 (file)
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -90,7 +90,7 @@ prototypes:
         void (*destroy_inode)(struct inode *);
         void (*read_inode) (struct inode *);
         void (*dirty_inode) (struct inode *);
-       void (*write_inode) (struct inode *, int);
+       int (*write_inode) (struct inode *, int);
         void (*put_inode) (struct inode *);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);
@@ -99,11 +99,13 @@ prototypes:
         int (*sync_fs)(struct super_block *sb, int wait);
         void (*write_super_lockfs) (struct super_block *);
         void (*unlockfs) (struct super_block *);
-       int (*statfs) (struct super_block *, struct kstatfs *);
+       int (*statfs) (struct dentry *, struct kstatfs *);
         int (*remount_fs) (struct super_block *, int *, char *);
         void (*clear_inode) (struct inode *);
         void (*umount_begin) (struct super_block *);
         int (*show_options)(struct seq_file *, struct vfsmount *);
+       ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+       ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
  
  locking rules:
         All may block.
@@ -126,22 +128,30 @@ remount_fs:               no      yes     maybe           (see below)
  clear_inode:           no
  umount_begin:          yes     no      no
  show_options:          no                              (vfsmount->sem)
+quota_read:            no      no      no              (see below)
+quota_write:           no      no      no              (see below)
  
  ->read_inode() is not a method - it's a callback used in iget().
  ->remount_fs() will have the s_umount lock if it's already mounted.
  When called from get_sb_single, it does NOT have the s_umount lock.
+->quota_read() and ->quota_write() functions are both guaranteed to
+be the only ones operating on the quota file by the quota code (via
+dqio_sem) (unless an admin really wants to screw up something and
+writes to quota files with quotas on). For other details about locking
+see also dquot_operations section.
  
  --------------------------- file_system_type ---------------------------
  prototypes:
-       struct super_block *(*get_sb) (struct file_system_type *, int,
-                       const char *, void *);
+       int (*get_sb) (struct file_system_type *, int,
+                      const char *, void *, struct vfsmount *);
         void (*kill_sb) (struct super_block *);
  locking rules:
                 may block       BKL
  get_sb         yes             yes
  kill_sb                yes             yes
  
-->get_sb() returns error or a locked superblock (exclusive on ->s_umount).
+->get_sb() returns error or 0 with locked superblock attached to the vfsmount
+(exclusive on ->s_umount).
  ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
  unlocks and drops the reference.
  
@@ -210,8 +220,12 @@ This may also be done to avoid internal deadlocks, but rarely.
  If the filesytem is called for sync then it must wait on any
  in-progress I/O and then start new I/O.
  
-The filesystem should unlock the page synchronously, before returning
-to the caller.
+The filesystem should unlock the page synchronously, before returning to the
+caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
+value. WRITEPAGE_ACTIVATE means that page cannot really be written out
+currently, and VM should stop calling ->writepage() on this page for some
+time. VM does this by moving page to the head of the active list, hence the
+name.
  
  Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
  and return zero, writepage *must* run set_page_writeback() against the page,
@@ -276,21 +290,40 @@ foo_get_block(). It's an overkill, since block bitmaps can be protected by
  internal fs locking and real critical areas are much smaller than the areas
  filesystems protect now.
  
---------------------------- file_lock ------------------------------------
+----------------------- file_lock_operations ------------------------------
  prototypes:
-       void (*fl_notify)(struct file_lock *);  /* unblock callback */
         void (*fl_insert)(struct file_lock *);  /* lock insertion callback */
         void (*fl_remove)(struct file_lock *);  /* lock removal callback */
+       void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+       void (*fl_release_private)(struct file_lock *);
+
+
+locking rules:
+                       BKL     may block
+fl_insert:             yes     no
+fl_remove:             yes     no
+fl_copy_lock:          yes     no
+fl_release_private:    yes     yes
+
+----------------------- lock_manager_operations ---------------------------
+prototypes:
+       int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
+       void (*fl_notify)(struct file_lock *);  /* unblock callback */
+       void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+       void (*fl_release_private)(struct file_lock *);
+       void (*fl_break)(struct file_lock *); /* break_lease callback */
  
  locking rules:
-               BKL     may block
-fl_notify:     yes     no
-fl_insert:     yes     no
-fl_remove:     yes     no
-       Currently only NLM provides instances of this class. None of the
+                       BKL     may block
+fl_compare_owner:      yes     no
+fl_notify:             yes     no
+fl_copy_lock:          yes     no
+fl_release_private:    yes     yes
+fl_break:              yes     no
+
+       Currently only NFSD and NLM provide instances of this class. None of the
  them block. If you have out-of-tree instances - please, show up. Locking
  in that area will change.
-
  --------------------------- buffer_head -----------------------------------
  prototypes:
         void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -298,8 +331,8 @@ prototypes:
  locking rules:
         called from interrupts. In other words, extreme care is needed here.
  bh is locked, but that's all warranties we have here. Currently only RAID1,
-highmem and fs/buffer.c are providing these. Block devices call this method
-upon the IO completion.
+highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
+call this method upon the IO completion.
  
  --------------------------- block_device_operations -----------------------
  prototypes:
@@ -331,6 +364,8 @@ prototypes:
         unsigned int (*poll) (struct file *, struct poll_table_struct *);
         int (*ioctl) (struct inode *, struct file *, unsigned int,
                         unsigned long);
+       long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+       long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
         int (*mmap) (struct file *, struct vm_area_struct *);
         int (*open) (struct inode *, struct file *);
         int (*flush) (struct file *);
@@ -349,6 +384,8 @@ prototypes:
                         loff_t *, int);
         unsigned long (*get_unmapped_area)(struct file *, unsigned long,
                         unsigned long, unsigned long, unsigned long);
+       int (*check_flags)(int);
+       int (*dir_notify)(struct file *, unsigned long);
  };
  
  locking rules:
@@ -362,6 +399,8 @@ aio_write:          no
  readdir:               no
  poll:                  no
  ioctl:                 yes     (see below)
+unlocked_ioctl:                no      (see below)
+compat_ioctl:          no
  mmap:                  no
  open:                  maybe   (see below)
  flush:                 no
@@ -375,6 +414,8 @@ writev:                     no
  sendfile:              no
  sendpage:              no
  get_unmapped_area:     no
+check_flags:           no
+dir_notify:            no
  
  ->llseek() locking has moved from llseek to the individual llseek
  implementations.  If your fs is not using generic_file_llseek, you
@@ -405,6 +446,9 @@ move ->readdir() to inode_operations and use a separate method for directory
  anything that resembles union-mount we won't have a struct file for all
  components. And there are other reasons why the current interface is a mess...
  
+->ioctl() on regular files is superceded by the ->unlocked_ioctl() that
+doesn't take the BKL.
+
  ->read on directories probably must go away - we should just enforce -EISDIR
  in sys_read() and friends.
  
@@ -412,23 +456,46 @@ in sys_read() and friends.
  
  --------------------------- dquot_operations -------------------------------
  prototypes:
-       void (*initialize) (struct inode *, short);
-       void (*drop) (struct inode *);
-       int (*alloc_block) (const struct inode *, unsigned long, char);
+       int (*initialize) (struct inode *, int);
+       int (*drop) (struct inode *);
+       int (*alloc_space) (struct inode *, qsize_t, int);
         int (*alloc_inode) (const struct inode *, unsigned long);
-       void (*free_block) (const struct inode *, unsigned long);
-       void (*free_inode) (const struct inode *, unsigned long);
-       int (*transfer) (struct dentry *, struct iattr *);
-
-locking rules:
-               BKL
-initialize:    no
-drop:          no
-alloc_block:   yes
-alloc_inode:   yes
-free_block:    yes
-free_inode:    yes
-transfer:      no
+       int (*free_space) (struct inode *, qsize_t);
+       int (*free_inode) (const struct inode *, unsigned long);
+       int (*transfer) (struct inode *, struct iattr *);
+       int (*write_dquot) (struct dquot *);
+       int (*acquire_dquot) (struct dquot *);
+       int (*release_dquot) (struct dquot *);
+       int (*mark_dirty) (struct dquot *);
+       int (*write_info) (struct super_block *, int);
+
+These operations are intended to be more or less wrapping functions that ensure
+a proper locking wrt the filesystem and call the generic quota operations.
+
+What filesystem should expect from the generic quota functions:
+
+               FS recursion    Held locks when called
+initialize:    yes             maybe dqonoff_sem
+drop:          yes             -
+alloc_space:   ->mark_dirty()  -
+alloc_inode:   ->mark_dirty()  -
+free_space:    ->mark_dirty()  -
+free_inode:    ->mark_dirty()  -
+transfer:      yes             -
+write_dquot:   yes             dqonoff_sem or dqptr_sem
+acquire_dquot: yes             dqonoff_sem or dqptr_sem
+release_dquot: yes             dqonoff_sem or dqptr_sem
+mark_dirty:    no              -
+write_info:    yes             dqonoff_sem
+
+FS recursion means calling ->quota_read() and ->quota_write() from superblock
+operations.
+
+->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
+only directly by the filesystem and do not call any fs functions only
+the ->mark_dirty() operation.
+
+More details about quota locking can be found in fs/dquot.c.
  
  --------------------------- vm_operations_struct -----------------------------
  prototypes: