void (*destroy_inode)(struct inode *);
void (*read_inode) (struct inode *);
void (*dirty_inode) (struct inode *);
- void (*write_inode) (struct inode *, int);
+ int (*write_inode) (struct inode *, int);
void (*put_inode) (struct inode *);
void (*drop_inode) (struct inode *);
void (*delete_inode) (struct inode *);
int (*sync_fs)(struct super_block *sb, int wait);
void (*write_super_lockfs) (struct super_block *);
void (*unlockfs) (struct super_block *);
- int (*statfs) (struct super_block *, struct kstatfs *);
+ int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *);
void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct vfsmount *);
+ ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+ ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
locking rules:
All may block.
write_super_lockfs: ?
unlockfs: ?
statfs: no no no
-remount_fs: no yes maybe (see below)
+remount_fs: yes yes maybe (see below)
clear_inode: no
umount_begin: yes no no
show_options: no (vfsmount->sem)
+quota_read: no no no (see below)
+quota_write: no no no (see below)
->read_inode() is not a method - it's a callback used in iget().
->remount_fs() will have the s_umount lock if it's already mounted.
When called from get_sb_single, it does NOT have the s_umount lock.
+->quota_read() and ->quota_write() functions are both guaranteed to
+be the only ones operating on the quota file by the quota code (via
+dqio_sem) (unless an admin really wants to screw up something and
+writes to quota files with quotas on). For other details about locking
+see also dquot_operations section.
--------------------------- file_system_type ---------------------------
prototypes:
- struct super_block *(*get_sb) (struct file_system_type *, int,
- const char *, void *);
+ int (*get_sb) (struct file_system_type *, int,
+ const char *, void *, struct vfsmount *);
void (*kill_sb) (struct super_block *);
locking rules:
may block BKL
get_sb yes yes
kill_sb yes yes
-->get_sb() returns error or a locked superblock (exclusive on ->s_umount).
+->get_sb() returns error or 0 with locked superblock attached to the vfsmount
+(exclusive on ->s_umount).
->kill_sb() takes a write-locked superblock, does all shutdown work on it,
unlocks and drops the reference.
int (*releasepage) (struct page *, int);
int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
+ int (*launder_page) (struct page *);
locking rules:
All except set_page_dirty may block
invalidatepage: no yes
releasepage: no yes
direct_IO: no
+launder_page: no yes
->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
may be called from the request handler (/dev/loop).
If the filesytem is called for sync then it must wait on any
in-progress I/O and then start new I/O.
-The filesystem should unlock the page synchronously, before returning
-to the caller.
+The filesystem should unlock the page synchronously, before returning to the
+caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
+value. WRITEPAGE_ACTIVATE means that page cannot really be written out
+currently, and VM should stop calling ->writepage() on this page for some
+time. VM does this by moving page to the head of the active list, hence the
+name.
Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
and return zero, writepage *must* run set_page_writeback() against the page,
indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
the kernel assumes that the fs has no private interest in the buffers.
+ ->launder_page() may be called prior to releasing a page if
+it is still found to be dirty. It returns zero if the page was successfully
+cleaned, or an error value if not. Note that in order to prevent the page
+getting mapped back in and redirtied, it needs to be kept locked
+across the entire operation.
+
Note: currently almost all instances of address_space methods are
using BKL for internal serialization and that's one of the worst sources
of contention. Normally they are calling library functions (in fs/buffer.c)
internal fs locking and real critical areas are much smaller than the areas
filesystems protect now.
---------------------------- file_lock ------------------------------------
+----------------------- file_lock_operations ------------------------------
prototypes:
- void (*fl_notify)(struct file_lock *); /* unblock callback */
void (*fl_insert)(struct file_lock *); /* lock insertion callback */
void (*fl_remove)(struct file_lock *); /* lock removal callback */
+ void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+ void (*fl_release_private)(struct file_lock *);
+
+
+locking rules:
+ BKL may block
+fl_insert: yes no
+fl_remove: yes no
+fl_copy_lock: yes no
+fl_release_private: yes yes
+
+----------------------- lock_manager_operations ---------------------------
+prototypes:
+ int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
+ void (*fl_notify)(struct file_lock *); /* unblock callback */
+ void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+ void (*fl_release_private)(struct file_lock *);
+ void (*fl_break)(struct file_lock *); /* break_lease callback */
locking rules:
- BKL may block
-fl_notify: yes no
-fl_insert: yes no
-fl_remove: yes no
- Currently only NLM provides instances of this class. None of the
+ BKL may block
+fl_compare_owner: yes no
+fl_notify: yes no
+fl_copy_lock: yes no
+fl_release_private: yes yes
+fl_break: yes no
+
+ Currently only NFSD and NLM provide instances of this class. None of the
them block. If you have out-of-tree instances - please, show up. Locking
in that area will change.
-
--------------------------- buffer_head -----------------------------------
prototypes:
void (*b_end_io)(struct buffer_head *bh, int uptodate);
locking rules:
called from interrupts. In other words, extreme care is needed here.
bh is locked, but that's all warranties we have here. Currently only RAID1,
-highmem and fs/buffer.c are providing these. Block devices call this method
-upon the IO completion.
+highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
+call this method upon the IO completion.
--------------------------- block_device_operations -----------------------
prototypes:
prototypes:
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t,
- loff_t);
+ ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int,
unsigned long);
+ long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+ long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
readdir: no
poll: no
ioctl: yes (see below)
+unlocked_ioctl: no (see below)
+compat_ioctl: no
mmap: no
open: maybe (see below)
flush: no
anything that resembles union-mount we won't have a struct file for all
components. And there are other reasons why the current interface is a mess...
+->ioctl() on regular files is superceded by the ->unlocked_ioctl() that
+doesn't take the BKL.
+
->read on directories probably must go away - we should just enforce -EISDIR
in sys_read() and friends.
--------------------------- dquot_operations -------------------------------
prototypes:
- void (*initialize) (struct inode *, short);
- void (*drop) (struct inode *);
- int (*alloc_block) (const struct inode *, unsigned long, char);
+ int (*initialize) (struct inode *, int);
+ int (*drop) (struct inode *);
+ int (*alloc_space) (struct inode *, qsize_t, int);
int (*alloc_inode) (const struct inode *, unsigned long);
- void (*free_block) (const struct inode *, unsigned long);
- void (*free_inode) (const struct inode *, unsigned long);
- int (*transfer) (struct dentry *, struct iattr *);
-
-locking rules:
- BKL
-initialize: no
-drop: no
-alloc_block: yes
-alloc_inode: yes
-free_block: yes
-free_inode: yes
-transfer: no
+ int (*free_space) (struct inode *, qsize_t);
+ int (*free_inode) (const struct inode *, unsigned long);
+ int (*transfer) (struct inode *, struct iattr *);
+ int (*write_dquot) (struct dquot *);
+ int (*acquire_dquot) (struct dquot *);
+ int (*release_dquot) (struct dquot *);
+ int (*mark_dirty) (struct dquot *);
+ int (*write_info) (struct super_block *, int);
+
+These operations are intended to be more or less wrapping functions that ensure
+a proper locking wrt the filesystem and call the generic quota operations.
+
+What filesystem should expect from the generic quota functions:
+
+ FS recursion Held locks when called
+initialize: yes maybe dqonoff_sem
+drop: yes -
+alloc_space: ->mark_dirty() -
+alloc_inode: ->mark_dirty() -
+free_space: ->mark_dirty() -
+free_inode: ->mark_dirty() -
+transfer: yes -
+write_dquot: yes dqonoff_sem or dqptr_sem
+acquire_dquot: yes dqonoff_sem or dqptr_sem
+release_dquot: yes dqonoff_sem or dqptr_sem
+mark_dirty: no -
+write_info: yes dqonoff_sem
+
+FS recursion means calling ->quota_read() and ->quota_write() from superblock
+operations.
+
+->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
+only directly by the filesystem and do not call any fs functions only
+the ->mark_dirty() operation.
+
+More details about quota locking can be found in fs/dquot.c.
--------------------------- vm_operations_struct -----------------------------
prototypes: