X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=Documentation%2Ffilesystems%2FLocking;h=247d7f619aa2c8cc86e7777004c198428798ff9c;hb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;hp=e0ef1f80f4f31d667ce373e2be179154da141b73;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index e0ef1f80f..247d7f619 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -90,7 +90,7 @@ prototypes: void (*destroy_inode)(struct inode *); void (*read_inode) (struct inode *); void (*dirty_inode) (struct inode *); - void (*write_inode) (struct inode *, int); + int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); @@ -99,11 +99,13 @@ prototypes: int (*sync_fs)(struct super_block *sb, int wait); void (*write_super_lockfs) (struct super_block *); void (*unlockfs) (struct super_block *); - int (*statfs) (struct super_block *, struct kstatfs *); + int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); + ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); locking rules: All may block. @@ -126,22 +128,30 @@ remount_fs: no yes maybe (see below) clear_inode: no umount_begin: yes no no show_options: no (vfsmount->sem) +quota_read: no no no (see below) +quota_write: no no no (see below) ->read_inode() is not a method - it's a callback used in iget(). ->remount_fs() will have the s_umount lock if it's already mounted. When called from get_sb_single, it does NOT have the s_umount lock. +->quota_read() and ->quota_write() functions are both guaranteed to +be the only ones operating on the quota file by the quota code (via +dqio_sem) (unless an admin really wants to screw up something and +writes to quota files with quotas on). For other details about locking +see also dquot_operations section. --------------------------- file_system_type --------------------------- prototypes: - struct super_block *(*get_sb) (struct file_system_type *, int, - const char *, void *); + int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); locking rules: may block BKL get_sb yes yes kill_sb yes yes -->get_sb() returns error or a locked superblock (exclusive on ->s_umount). +->get_sb() returns error or 0 with locked superblock attached to the vfsmount +(exclusive on ->s_umount). ->kill_sb() takes a write-locked superblock, does all shutdown work on it, unlocks and drops the reference. @@ -210,8 +220,12 @@ This may also be done to avoid internal deadlocks, but rarely. If the filesytem is called for sync then it must wait on any in-progress I/O and then start new I/O. -The filesystem should unlock the page synchronously, before returning -to the caller. +The filesystem should unlock the page synchronously, before returning to the +caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE +value. WRITEPAGE_ACTIVATE means that page cannot really be written out +currently, and VM should stop calling ->writepage() on this page for some +time. VM does this by moving page to the head of the active list, hence the +name. Unless the filesystem is going to redirty_page_for_writepage(), unlock the page and return zero, writepage *must* run set_page_writeback() against the page, @@ -276,21 +290,40 @@ foo_get_block(). It's an overkill, since block bitmaps can be protected by internal fs locking and real critical areas are much smaller than the areas filesystems protect now. ---------------------------- file_lock ------------------------------------ +----------------------- file_lock_operations ------------------------------ prototypes: - void (*fl_notify)(struct file_lock *); /* unblock callback */ void (*fl_insert)(struct file_lock *); /* lock insertion callback */ void (*fl_remove)(struct file_lock *); /* lock removal callback */ + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); + + +locking rules: + BKL may block +fl_insert: yes no +fl_remove: yes no +fl_copy_lock: yes no +fl_release_private: yes yes + +----------------------- lock_manager_operations --------------------------- +prototypes: + int (*fl_compare_owner)(struct file_lock *, struct file_lock *); + void (*fl_notify)(struct file_lock *); /* unblock callback */ + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); + void (*fl_break)(struct file_lock *); /* break_lease callback */ locking rules: - BKL may block -fl_notify: yes no -fl_insert: yes no -fl_remove: yes no - Currently only NLM provides instances of this class. None of the + BKL may block +fl_compare_owner: yes no +fl_notify: yes no +fl_copy_lock: yes no +fl_release_private: yes yes +fl_break: yes no + + Currently only NFSD and NLM provide instances of this class. None of the them block. If you have out-of-tree instances - please, show up. Locking in that area will change. - --------------------------- buffer_head ----------------------------------- prototypes: void (*b_end_io)(struct buffer_head *bh, int uptodate); @@ -298,8 +331,8 @@ prototypes: locking rules: called from interrupts. In other words, extreme care is needed here. bh is locked, but that's all warranties we have here. Currently only RAID1, -highmem and fs/buffer.c are providing these. Block devices call this method -upon the IO completion. +highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices +call this method upon the IO completion. --------------------------- block_device_operations ----------------------- prototypes: @@ -331,6 +364,8 @@ prototypes: unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); + long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *); @@ -349,6 +384,8 @@ prototypes: loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + int (*check_flags)(int); + int (*dir_notify)(struct file *, unsigned long); }; locking rules: @@ -362,6 +399,8 @@ aio_write: no readdir: no poll: no ioctl: yes (see below) +unlocked_ioctl: no (see below) +compat_ioctl: no mmap: no open: maybe (see below) flush: no @@ -375,6 +414,8 @@ writev: no sendfile: no sendpage: no get_unmapped_area: no +check_flags: no +dir_notify: no ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -405,6 +446,9 @@ move ->readdir() to inode_operations and use a separate method for directory anything that resembles union-mount we won't have a struct file for all components. And there are other reasons why the current interface is a mess... +->ioctl() on regular files is superceded by the ->unlocked_ioctl() that +doesn't take the BKL. + ->read on directories probably must go away - we should just enforce -EISDIR in sys_read() and friends. @@ -412,23 +456,46 @@ in sys_read() and friends. --------------------------- dquot_operations ------------------------------- prototypes: - void (*initialize) (struct inode *, short); - void (*drop) (struct inode *); - int (*alloc_block) (const struct inode *, unsigned long, char); + int (*initialize) (struct inode *, int); + int (*drop) (struct inode *); + int (*alloc_space) (struct inode *, qsize_t, int); int (*alloc_inode) (const struct inode *, unsigned long); - void (*free_block) (const struct inode *, unsigned long); - void (*free_inode) (const struct inode *, unsigned long); - int (*transfer) (struct dentry *, struct iattr *); - -locking rules: - BKL -initialize: no -drop: no -alloc_block: yes -alloc_inode: yes -free_block: yes -free_inode: yes -transfer: no + int (*free_space) (struct inode *, qsize_t); + int (*free_inode) (const struct inode *, unsigned long); + int (*transfer) (struct inode *, struct iattr *); + int (*write_dquot) (struct dquot *); + int (*acquire_dquot) (struct dquot *); + int (*release_dquot) (struct dquot *); + int (*mark_dirty) (struct dquot *); + int (*write_info) (struct super_block *, int); + +These operations are intended to be more or less wrapping functions that ensure +a proper locking wrt the filesystem and call the generic quota operations. + +What filesystem should expect from the generic quota functions: + + FS recursion Held locks when called +initialize: yes maybe dqonoff_sem +drop: yes - +alloc_space: ->mark_dirty() - +alloc_inode: ->mark_dirty() - +free_space: ->mark_dirty() - +free_inode: ->mark_dirty() - +transfer: yes - +write_dquot: yes dqonoff_sem or dqptr_sem +acquire_dquot: yes dqonoff_sem or dqptr_sem +release_dquot: yes dqonoff_sem or dqptr_sem +mark_dirty: no - +write_info: yes dqonoff_sem + +FS recursion means calling ->quota_read() and ->quota_write() from superblock +operations. + +->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called +only directly by the filesystem and do not call any fs functions only +the ->mark_dirty() operation. + +More details about quota locking can be found in fs/dquot.c. --------------------------- vm_operations_struct ----------------------------- prototypes: