X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=Documentation%2Ffilesystems%2FLocking;h=2c9ce27b189dea1e3ffe3121d2b6c7559f323cc6;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=83e6a8f83ce85a6da75dee59454e4207cd47b43c;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 83e6a8f83..2c9ce27b1 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -90,7 +90,7 @@ prototypes: void (*destroy_inode)(struct inode *); void (*read_inode) (struct inode *); void (*dirty_inode) (struct inode *); - void (*write_inode) (struct inode *, int); + int (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); @@ -203,20 +203,34 @@ currently-in-progress I/O. If the filesystem is not called for "sync" and it determines that it would need to block against in-progress I/O to be able to start new I/O -against the page the filesystem shoud redirty the page (usually with -__set_page_dirty_nobuffers()), then unlock the page and return zero. +against the page the filesystem should redirty the page with +redirty_page_for_writepage(), then unlock the page and return zero. This may also be done to avoid internal deadlocks, but rarely. If the filesytem is called for sync then it must wait on any in-progress I/O and then start new I/O. The filesystem should unlock the page synchronously, before returning -to the caller. If the page has write I/O underway against it, -writepage() should run SetPageWriteback() against the page prior to -unlocking it. The write I/O completion handler should run -end_page_writeback() against the page. - -That is: after 2.5.12, pages which are under writeout are *not* locked. +to the caller. + +Unless the filesystem is going to redirty_page_for_writepage(), unlock the page +and return zero, writepage *must* run set_page_writeback() against the page, +followed by unlocking it. Once set_page_writeback() has been run against the +page, write I/O can be submitted and the write I/O completion handler must run +end_page_writeback() once the I/O is complete. If no I/O is submitted, the +filesystem must run end_page_writeback() against the page before returning from +writepage. + +That is: after 2.5.12, pages which are under writeout are *not* locked. Note, +if the filesystem needs the page to be locked during writeout, that is ok, too, +the page is allowed to be unlocked at any point in time between the calls to +set_page_writeback() and end_page_writeback(). + +Note, failure to run either redirty_page_for_writepage() or the combination of +set_page_writeback()/end_page_writeback() on a page submitted to writepage +will leave the page itself marked clean but it will be tagged as dirty in the +radix tree. This incoherency can lead to all sorts of hard-to-debug problems +in the filesystem like having dirty inodes at umount and losing written data. ->sync_page() locking rules are not well-defined - usually it is called with lock on page, but that is not guaranteed. Considering the currently @@ -262,21 +276,40 @@ foo_get_block(). It's an overkill, since block bitmaps can be protected by internal fs locking and real critical areas are much smaller than the areas filesystems protect now. ---------------------------- file_lock ------------------------------------ +----------------------- file_lock_operations ------------------------------ prototypes: - void (*fl_notify)(struct file_lock *); /* unblock callback */ void (*fl_insert)(struct file_lock *); /* lock insertion callback */ void (*fl_remove)(struct file_lock *); /* lock removal callback */ + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); + + +locking rules: + BKL may block +fl_insert: yes no +fl_remove: yes no +fl_copy_lock: yes no +fl_release_private: yes yes + +----------------------- lock_manager_operations --------------------------- +prototypes: + int (*fl_compare_owner)(struct file_lock *, struct file_lock *); + void (*fl_notify)(struct file_lock *); /* unblock callback */ + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); + void (*fl_break)(struct file_lock *); /* break_lease callback */ locking rules: - BKL may block -fl_notify: yes no -fl_insert: yes no -fl_remove: yes no - Currently only NLM provides instances of this class. None of the + BKL may block +fl_compare_owner: yes no +fl_notify: yes no +fl_copy_lock: yes no +fl_release_private: yes yes +fl_break: yes no + + Currently only NFSD and NLM provide instances of this class. None of the them block. If you have out-of-tree instances - please, show up. Locking in that area will change. - --------------------------- buffer_head ----------------------------------- prototypes: void (*b_end_io)(struct buffer_head *bh, int uptodate); @@ -284,8 +317,8 @@ prototypes: locking rules: called from interrupts. In other words, extreme care is needed here. bh is locked, but that's all warranties we have here. Currently only RAID1, -highmem and fs/buffer.c are providing these. Block devices call this method -upon the IO completion. +highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices +call this method upon the IO completion. --------------------------- block_device_operations ----------------------- prototypes: @@ -317,6 +350,8 @@ prototypes: unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); + long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *); @@ -335,6 +370,8 @@ prototypes: loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + int (*check_flags)(int); + int (*dir_notify)(struct file *, unsigned long); }; locking rules: @@ -348,6 +385,8 @@ aio_write: no readdir: no poll: no ioctl: yes (see below) +unlocked_ioctl: no (see below) +compat_ioctl: no mmap: no open: maybe (see below) flush: no @@ -361,6 +400,8 @@ writev: no sendfile: no sendpage: no get_unmapped_area: no +check_flags: no +dir_notify: no ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -391,6 +432,9 @@ move ->readdir() to inode_operations and use a separate method for directory anything that resembles union-mount we won't have a struct file for all components. And there are other reasons why the current interface is a mess... +->ioctl() on regular files is superceded by the ->unlocked_ioctl() that +doesn't take the BKL. + ->read on directories probably must go away - we should just enforce -EISDIR in sys_read() and friends.