X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Focfs2%2Fdlmglue.c;h=5ca42a0d8fec90a5691f1a52e9bcc331e05b7233;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=e971ec2f8407fcb0e0c4c976153603d7a1466b95;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e971ec2f8..5ca42a0d8 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -46,8 +46,10 @@ #include "ocfs2.h" #include "alloc.h" +#include "dcache.h" #include "dlmglue.h" #include "extent_map.h" +#include "file.h" #include "heartbeat.h" #include "inode.h" #include "journal.h" @@ -66,78 +68,161 @@ struct ocfs2_mask_waiter { unsigned long mw_goal; }; -static void ocfs2_inode_ast_func(void *opaque); -static void ocfs2_inode_bast_func(void *opaque, - int level); -static void ocfs2_super_ast_func(void *opaque); -static void ocfs2_super_bast_func(void *opaque, - int level); -static void ocfs2_rename_ast_func(void *opaque); -static void ocfs2_rename_bast_func(void *opaque, - int level); - -/* so far, all locks have gotten along with the same unlock ast */ -static void ocfs2_unlock_ast_func(void *opaque, - enum dlm_status status); -static int ocfs2_do_unblock_meta(struct inode *inode, - int *requeue); -static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, - int *requeue); -static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, - int *requeue); -static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, - int *requeue); -static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, - int *requeue); -typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int); -static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int *requeue, - ocfs2_convert_worker_t *worker); +static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); +static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); +/* + * Return value from ->downconvert_worker functions. + * + * These control the precise actions of ocfs2_unblock_lock() + * and ocfs2_process_blocked_lock() + * + */ +enum ocfs2_unblock_action { + UNBLOCK_CONTINUE = 0, /* Continue downconvert */ + UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire + * ->post_unlock callback */ + UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire + * ->post_unlock() callback. */ +}; + +struct ocfs2_unblock_ctl { + int requeue; + enum ocfs2_unblock_action unblock_action; +}; + +static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, + int new_level); +static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); + +static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, + int blocking); + +static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, + int blocking); + +static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres); + +/* + * OCFS2 Lock Resource Operations + * + * These fine tune the behavior of the generic dlmglue locking infrastructure. + * + * The most basic of lock types can point ->l_priv to their respective + * struct ocfs2_super and allow the default actions to manage things. + * + * Right now, each lock type also needs to implement an init function, + * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() + * should be called when the lock is no longer needed (i.e., object + * destruction time). + */ struct ocfs2_lock_res_ops { - void (*ast)(void *); - void (*bast)(void *, int); - void (*unlock_ast)(void *, enum dlm_status); - int (*unblock)(struct ocfs2_lock_res *, int *); + /* + * Translate an ocfs2_lock_res * into an ocfs2_super *. Define + * this callback if ->l_priv is not an ocfs2_super pointer + */ + struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); + + /* + * Optionally called in the downconvert (or "vote") thread + * after a successful downconvert. The lockres will not be + * referenced after this callback is called, so it is safe to + * free memory, etc. + * + * The exact semantics of when this is called are controlled + * by ->downconvert_worker() + */ + void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); + + /* + * Allow a lock type to add checks to determine whether it is + * safe to downconvert a lock. Return 0 to re-queue the + * downconvert at a later time, nonzero to continue. + * + * For most locks, the default checks that there are no + * incompatible holders are sufficient. + * + * Called with the lockres spinlock held. + */ + int (*check_downconvert)(struct ocfs2_lock_res *, int); + + /* + * Allows a lock type to populate the lock value block. This + * is called on downconvert, and when we drop a lock. + * + * Locks that want to use this should set LOCK_TYPE_USES_LVB + * in the flags field. + * + * Called with the lockres spinlock held. + */ + void (*set_lvb)(struct ocfs2_lock_res *); + + /* + * Called from the downconvert thread when it is determined + * that a lock will be downconverted. This is called without + * any locks held so the function can do work that might + * schedule (syncing out data, etc). + * + * This should return any one of the ocfs2_unblock_action + * values, depending on what it wants the thread to do. + */ + int (*downconvert_worker)(struct ocfs2_lock_res *, int); + + /* + * LOCK_TYPE_* flags which describe the specific requirements + * of a lock type. Descriptions of each individual flag follow. + */ + int flags; }; +/* + * Some locks want to "refresh" potentially stale data when a + * meaningful (PRMODE or EXMODE) lock level is first obtained. If this + * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the + * individual lockres l_flags member from the ast function. It is + * expected that the locking wrapper will clear the + * OCFS2_LOCK_NEEDS_REFRESH flag when done. + */ +#define LOCK_TYPE_REQUIRES_REFRESH 0x1 + +/* + * Indicate that a lock type makes use of the lock value block. The + * ->set_lvb lock type callback must be defined. + */ +#define LOCK_TYPE_USES_LVB 0x2 + static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { - .ast = ocfs2_inode_ast_func, - .bast = ocfs2_inode_bast_func, - .unlock_ast = ocfs2_unlock_ast_func, - .unblock = ocfs2_unblock_inode_lock, + .get_osb = ocfs2_get_inode_osb, + .flags = 0, }; static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { - .ast = ocfs2_inode_ast_func, - .bast = ocfs2_inode_bast_func, - .unlock_ast = ocfs2_unlock_ast_func, - .unblock = ocfs2_unblock_meta, + .get_osb = ocfs2_get_inode_osb, + .check_downconvert = ocfs2_check_meta_downconvert, + .set_lvb = ocfs2_set_meta_lvb, + .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, - int blocking); - static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { - .ast = ocfs2_inode_ast_func, - .bast = ocfs2_inode_bast_func, - .unlock_ast = ocfs2_unlock_ast_func, - .unblock = ocfs2_unblock_data, + .get_osb = ocfs2_get_inode_osb, + .downconvert_worker = ocfs2_data_convert_worker, + .flags = 0, }; static struct ocfs2_lock_res_ops ocfs2_super_lops = { - .ast = ocfs2_super_ast_func, - .bast = ocfs2_super_bast_func, - .unlock_ast = ocfs2_unlock_ast_func, - .unblock = ocfs2_unblock_osb_lock, + .flags = LOCK_TYPE_REQUIRES_REFRESH, }; static struct ocfs2_lock_res_ops ocfs2_rename_lops = { - .ast = ocfs2_rename_ast_func, - .bast = ocfs2_rename_bast_func, - .unlock_ast = ocfs2_unlock_ast_func, - .unblock = ocfs2_unblock_osb_lock, + .flags = 0, +}; + +static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { + .get_osb = ocfs2_get_dentry_osb, + .post_unlock = ocfs2_dentry_post_unlock, + .downconvert_worker = ocfs2_dentry_convert_worker, + .flags = 0, }; static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) @@ -147,29 +232,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) lockres->l_type == OCFS2_LOCK_TYPE_RW; } -static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) +static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) { - return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; -} + BUG_ON(!ocfs2_is_inode_lock(lockres)); -static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) -{ - return lockres->l_type == OCFS2_LOCK_TYPE_RENAME; + return (struct inode *) lockres->l_priv; } -static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) +static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) { - BUG_ON(!ocfs2_is_super_lock(lockres) - && !ocfs2_is_rename_lock(lockres)); + BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); - return (struct ocfs2_super *) lockres->l_priv; + return (struct ocfs2_dentry_lock *)lockres->l_priv; } -static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) +static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) { - BUG_ON(!ocfs2_is_inode_lock(lockres)); + if (lockres->l_ops->get_osb) + return lockres->l_ops->get_osb(lockres); - return (struct inode *) lockres->l_priv; + return (struct ocfs2_super *)lockres->l_priv; } static int ocfs2_lock_create(struct ocfs2_super *osb, @@ -200,25 +282,6 @@ static int ocfs2_meta_lock_update(struct inode *inode, struct buffer_head **bh); static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); static inline int ocfs2_highest_compat_lock_level(int level); -static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, - struct ocfs2_lock_res *lockres, - int new_level); - -static char *ocfs2_lock_type_strings[] = { - [OCFS2_LOCK_TYPE_META] = "Meta", - [OCFS2_LOCK_TYPE_DATA] = "Data", - [OCFS2_LOCK_TYPE_SUPER] = "Super", - [OCFS2_LOCK_TYPE_RENAME] = "Rename", - /* Need to differntiate from [R]ename.. serializing writes is the - * important job it does, anyway. */ - [OCFS2_LOCK_TYPE_RW] = "Write/Read", -}; - -static char *ocfs2_lock_type_string(enum ocfs2_lock_type type) -{ - mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); - return ocfs2_lock_type_strings[type]; -} static void ocfs2_build_lock_name(enum ocfs2_lock_type type, u64 blkno, @@ -231,9 +294,9 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type, BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); - len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016"MLFx64"%08x", - ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, blkno, - generation); + len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", + ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, + (long long)blkno, generation); BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); @@ -242,7 +305,7 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type, mlog_exit_void(); } -static spinlock_t ocfs2_dlm_tracking_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, struct ocfs2_dlm_debug *dlm_debug) @@ -265,13 +328,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, - u64 blkno, - u32 generation, struct ocfs2_lock_res_ops *ops, void *priv) { - ocfs2_build_lock_name(type, blkno, generation, res->l_name); - res->l_type = type; res->l_ops = ops; res->l_priv = priv; @@ -299,6 +358,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, enum ocfs2_lock_type type, + unsigned int generation, struct inode *inode) { struct ocfs2_lock_res_ops *ops; @@ -319,9 +379,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, break; }; - ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, - OCFS2_I(inode)->ip_blkno, - inode->i_generation, ops, inode); + ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, + generation, res->l_name); + ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); +} + +static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) +{ + struct inode *inode = ocfs2_lock_res_inode(lockres); + + return OCFS2_SB(inode->i_sb); +} + +static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) +{ + __be64 inode_blkno_be; + + memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], + sizeof(__be64)); + + return be64_to_cpu(inode_blkno_be); +} + +static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_dentry_lock *dl = lockres->l_priv; + + return OCFS2_SB(dl->dl_inode->i_sb); +} + +void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, + u64 parent, struct inode *inode) +{ + int len; + u64 inode_blkno = OCFS2_I(inode)->ip_blkno; + __be64 inode_blkno_be = cpu_to_be64(inode_blkno); + struct ocfs2_lock_res *lockres = &dl->dl_lockres; + + ocfs2_lock_res_init_once(lockres); + + /* + * Unfortunately, the standard lock naming scheme won't work + * here because we have two 16 byte values to use. Instead, + * we'll stuff the inode number as a binary value. We still + * want error prints to show something without garbling the + * display, so drop a null byte in there before the inode + * number. A future version of OCFS2 will likely use all + * binary lock names. The stringified names have been a + * tremendous aid in debugging, but now that the debugfs + * interface exists, we can mangle things there if need be. + * + * NOTE: We also drop the standard "pad" value (the total lock + * name size stays the same though - the last part is all + * zeros due to the memset in ocfs2_lock_res_init_once() + */ + len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, + "%c%016llx", + ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), + (long long)parent); + + BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); + + memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, + sizeof(__be64)); + + ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, + OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, + dl); } static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, @@ -330,8 +454,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, /* Superblock lockres doesn't come from a slab so we call init * once on it manually. */ ocfs2_lock_res_init_once(res); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, + 0, res->l_name); ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, - OCFS2_SUPER_BLOCK_BLKNO, 0, &ocfs2_super_lops, osb); } @@ -341,7 +466,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, /* Rename lockres doesn't come from a slab so we call init * once on it manually. */ ocfs2_lock_res_init_once(res); - ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); + ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, &ocfs2_rename_lops, osb); } @@ -495,7 +621,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo * information is already up to data. Convert from NL to * *anything* however should mark ourselves as needing an * update */ - if (lockres->l_level == LKM_NLMODE) + if (lockres->l_level == LKM_NLMODE && + lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); lockres->l_level = lockres->l_requested; @@ -512,7 +639,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); if (lockres->l_requested > LKM_NLMODE && - !(lockres->l_flags & OCFS2_LOCK_LOCAL)) + !(lockres->l_flags & OCFS2_LOCK_LOCAL) && + lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); lockres->l_level = lockres->l_requested; @@ -522,68 +650,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc mlog_exit_void(); } -static void ocfs2_inode_ast_func(void *opaque) -{ - struct ocfs2_lock_res *lockres = opaque; - struct inode *inode; - struct dlm_lockstatus *lksb; - unsigned long flags; - - mlog_entry_void(); - - inode = ocfs2_lock_res_inode(lockres); - - mlog(0, "AST fired for inode %"MLFu64", l_action = %u, type = %s\n", - OCFS2_I(inode)->ip_blkno, lockres->l_action, - ocfs2_lock_type_string(lockres->l_type)); - - BUG_ON(!ocfs2_is_inode_lock(lockres)); - - spin_lock_irqsave(&lockres->l_lock, flags); - - lksb = &(lockres->l_lksb); - if (lksb->status != DLM_NORMAL) { - mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u " - "on inode %"MLFu64"\n", lksb->status, - OCFS2_I(inode)->ip_blkno); - spin_unlock_irqrestore(&lockres->l_lock, flags); - mlog_exit_void(); - return; - } - - switch(lockres->l_action) { - case OCFS2_AST_ATTACH: - ocfs2_generic_handle_attach_action(lockres); - lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); - break; - case OCFS2_AST_CONVERT: - ocfs2_generic_handle_convert_action(lockres); - break; - case OCFS2_AST_DOWNCONVERT: - ocfs2_generic_handle_downconvert_action(lockres); - break; - default: - mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " - "lockres flags = 0x%lx, unlock action: %u\n", - lockres->l_name, lockres->l_action, lockres->l_flags, - lockres->l_unlock_action); - - BUG(); - } - - /* data and rw locking ignores refresh flag for now. */ - if (lockres->l_type != OCFS2_LOCK_TYPE_META) - lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); - - /* set it to something invalid so if we get called again we - * can catch it. */ - lockres->l_action = OCFS2_AST_INVALID; - spin_unlock_irqrestore(&lockres->l_lock, flags); - wake_up(&lockres->l_event); - - mlog_exit_void(); -} - static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level) { @@ -610,55 +676,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, return needs_downconvert; } -static void ocfs2_generic_bast_func(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level) +static void ocfs2_blocking_ast(void *opaque, int level) { + struct ocfs2_lock_res *lockres = opaque; + struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); int needs_downconvert; unsigned long flags; - mlog_entry_void(); - BUG_ON(level <= LKM_NLMODE); + mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", + lockres->l_name, level, lockres->l_level, + ocfs2_lock_type_string(lockres->l_type)); + spin_lock_irqsave(&lockres->l_lock, flags); needs_downconvert = ocfs2_generic_handle_bast(lockres, level); if (needs_downconvert) ocfs2_schedule_blocked_lock(osb, lockres); spin_unlock_irqrestore(&lockres->l_lock, flags); - ocfs2_kick_vote_thread(osb); - wake_up(&lockres->l_event); - mlog_exit_void(); -} -static void ocfs2_inode_bast_func(void *opaque, int level) -{ - struct ocfs2_lock_res *lockres = opaque; - struct inode *inode; - struct ocfs2_super *osb; - - mlog_entry_void(); - - BUG_ON(!ocfs2_is_inode_lock(lockres)); - - inode = ocfs2_lock_res_inode(lockres); - osb = OCFS2_SB(inode->i_sb); - - mlog(0, "BAST fired for inode %"MLFu64", blocking = %d, level = %d " - "type = %s\n", OCFS2_I(inode)->ip_blkno, level, - lockres->l_level, - ocfs2_lock_type_string(lockres->l_type)); - - ocfs2_generic_bast_func(osb, lockres, level); - - mlog_exit_void(); + ocfs2_kick_vote_thread(osb); } -static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, - int ignore_refresh) +static void ocfs2_locking_ast(void *opaque) { + struct ocfs2_lock_res *lockres = opaque; struct dlm_lockstatus *lksb = &lockres->l_lksb; unsigned long flags; @@ -674,6 +718,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, switch(lockres->l_action) { case OCFS2_AST_ATTACH: ocfs2_generic_handle_attach_action(lockres); + lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); break; case OCFS2_AST_CONVERT: ocfs2_generic_handle_convert_action(lockres); @@ -682,80 +727,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, ocfs2_generic_handle_downconvert_action(lockres); break; default: + mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " + "lockres flags = 0x%lx, unlock action: %u\n", + lockres->l_name, lockres->l_action, lockres->l_flags, + lockres->l_unlock_action); BUG(); } - if (ignore_refresh) - lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); - /* set it to something invalid so if we get called again we * can catch it. */ lockres->l_action = OCFS2_AST_INVALID; - spin_unlock_irqrestore(&lockres->l_lock, flags); wake_up(&lockres->l_event); -} - -static void ocfs2_super_ast_func(void *opaque) -{ - struct ocfs2_lock_res *lockres = opaque; - - mlog_entry_void(); - mlog(0, "Superblock AST fired\n"); - - BUG_ON(!ocfs2_is_super_lock(lockres)); - ocfs2_generic_ast_func(lockres, 0); - - mlog_exit_void(); -} - -static void ocfs2_super_bast_func(void *opaque, - int level) -{ - struct ocfs2_lock_res *lockres = opaque; - struct ocfs2_super *osb; - - mlog_entry_void(); - mlog(0, "Superblock BAST fired\n"); - - BUG_ON(!ocfs2_is_super_lock(lockres)); - osb = ocfs2_lock_res_super(lockres); - ocfs2_generic_bast_func(osb, lockres, level); - - mlog_exit_void(); -} - -static void ocfs2_rename_ast_func(void *opaque) -{ - struct ocfs2_lock_res *lockres = opaque; - - mlog_entry_void(); - - mlog(0, "Rename AST fired\n"); - - BUG_ON(!ocfs2_is_rename_lock(lockres)); - - ocfs2_generic_ast_func(lockres, 1); - - mlog_exit_void(); -} - -static void ocfs2_rename_bast_func(void *opaque, - int level) -{ - struct ocfs2_lock_res *lockres = opaque; - struct ocfs2_super *osb; - - mlog_entry_void(); - - mlog(0, "Rename BAST fired\n"); - - BUG_ON(!ocfs2_is_rename_lock(lockres)); - - osb = ocfs2_lock_res_super(lockres); - ocfs2_generic_bast_func(osb, lockres, level); - - mlog_exit_void(); + spin_unlock_irqrestore(&lockres->l_lock, flags); } static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, @@ -786,7 +770,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, int dlm_flags) { int ret = 0; - enum dlm_status status; + enum dlm_status status = DLM_NORMAL; unsigned long flags; mlog_entry_void(); @@ -811,9 +795,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, &lockres->l_lksb, dlm_flags, lockres->l_name, - lockres->l_ops->ast, + OCFS2_LOCK_ID_MAX_LEN - 1, + ocfs2_locking_ast, lockres, - lockres->l_ops->bast); + ocfs2_blocking_ast); if (status != DLM_NORMAL) { ocfs2_log_dlm_error("dlmlock", status, lockres); ret = -EINVAL; @@ -931,6 +916,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb, ocfs2_init_mask_waiter(&mw); + if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lkm_flags |= LKM_VALBLK; + again: wait = 0; @@ -998,11 +986,12 @@ again: status = dlmlock(osb->dlm, level, &lockres->l_lksb, - lkm_flags|LKM_CONVERT|LKM_VALBLK, + lkm_flags|LKM_CONVERT, lockres->l_name, - lockres->l_ops->ast, + OCFS2_LOCK_ID_MAX_LEN - 1, + ocfs2_locking_ast, lockres, - lockres->l_ops->bast); + ocfs2_blocking_ast); if (status != DLM_NORMAL) { if ((lkm_flags & LKM_NOQUEUE) && (status == DLM_NOTQUEUED)) @@ -1075,18 +1064,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, mlog_exit_void(); } -static int ocfs2_create_new_inode_lock(struct inode *inode, - struct ocfs2_lock_res *lockres) +static int ocfs2_create_new_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int ex, + int local) { - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int level = ex ? LKM_EXMODE : LKM_PRMODE; unsigned long flags; + int lkm_flags = local ? LKM_LOCAL : 0; spin_lock_irqsave(&lockres->l_lock, flags); BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); spin_unlock_irqrestore(&lockres->l_lock, flags); - return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); + return ocfs2_lock_create(osb, lockres, level, lkm_flags); } /* Grants us an EX lock on the data and metadata resources, skipping @@ -1098,13 +1090,14 @@ static int ocfs2_create_new_inode_lock(struct inode *inode, int ocfs2_create_new_inode_locks(struct inode *inode) { int ret; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!inode); BUG_ON(!ocfs2_inode_is_new(inode)); mlog_entry_void(); - mlog(0, "Inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno); + mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); /* NOTE: That we don't increment any of the holder counts, nor * do we add anything to a journal handle. Since this is @@ -1114,22 +1107,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode) * on a resource which has an invalid one -- we'll set it * valid when we release the EX. */ - ret = ocfs2_create_new_inode_lock(inode, - &OCFS2_I(inode)->ip_rw_lockres); + ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); if (ret) { mlog_errno(ret); goto bail; } - ret = ocfs2_create_new_inode_lock(inode, - &OCFS2_I(inode)->ip_meta_lockres); + /* + * We don't want to use LKM_LOCAL on a meta data lock as they + * don't use a generation in their lock names. + */ + ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); if (ret) { mlog_errno(ret); goto bail; } - ret = ocfs2_create_new_inode_lock(inode, - &OCFS2_I(inode)->ip_data_lockres); + ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); if (ret) { mlog_errno(ret); goto bail; @@ -1144,15 +1138,19 @@ int ocfs2_rw_lock(struct inode *inode, int write) { int status, level; struct ocfs2_lock_res *lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!inode); mlog_entry_void(); - mlog(0, "inode %"MLFu64" take %s RW lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu take %s RW lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); + if (ocfs2_mount_local(osb)) + return 0; + lockres = &OCFS2_I(inode)->ip_rw_lockres; level = write ? LKM_EXMODE : LKM_PRMODE; @@ -1170,14 +1168,16 @@ void ocfs2_rw_unlock(struct inode *inode, int write) { int level = write ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); - mlog(0, "inode %"MLFu64" drop %s RW lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu drop %s RW lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); - ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); } @@ -1188,13 +1188,14 @@ int ocfs2_data_lock_full(struct inode *inode, { int status = 0, level; struct ocfs2_lock_res *lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!inode); mlog_entry_void(); - mlog(0, "inode %"MLFu64" take %s DATA lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu take %s DATA lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); /* We'll allow faking a readonly data lock for @@ -1207,6 +1208,9 @@ int ocfs2_data_lock_full(struct inode *inode, goto out; } + if (ocfs2_mount_local(osb)) + goto out; + lockres = &OCFS2_I(inode)->ip_data_lockres; level = write ? LKM_EXMODE : LKM_PRMODE; @@ -1275,14 +1279,16 @@ void ocfs2_data_unlock(struct inode *inode, { int level = write ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); - mlog(0, "inode %"MLFu64" drop %s DATA lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu drop %s DATA lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && + !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); @@ -1318,11 +1324,22 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; - lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); + /* + * Invalidate the LVB of a deleted inode - this way other + * nodes are forced to go to disk and discover the new inode + * status. + */ + if (oi->ip_flags & OCFS2_INODE_DELETED) { + lvb->lvb_version = 0; + goto out; + } + + lvb->lvb_version = OCFS2_LVB_VERSION; lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); lvb->lvb_iuid = cpu_to_be32(inode->i_uid); lvb->lvb_igid = cpu_to_be32(inode->i_gid); + lvb->lvb_itag = cpu_to_be16(inode->i_tag); lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); lvb->lvb_iatime_packed = @@ -1331,7 +1348,10 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); + lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); + lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); +out: mlog_meta_lvb(0, lockres); mlog_exit_void(); @@ -1361,6 +1381,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); + oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); + ocfs2_set_inode_flags(inode); + /* fast-symlinks are a special case */ if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) inode->i_blocks = 0; @@ -1370,6 +1393,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); + inode->i_tag = be16_to_cpu(lvb->lvb_itag); inode->i_mode = be16_to_cpu(lvb->lvb_imode); inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); ocfs2_unpack_timespec(&inode->i_atime, @@ -1383,11 +1407,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) mlog_exit_void(); } -static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) +static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, + struct ocfs2_lock_res *lockres) { struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; - if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) + if (lvb->lvb_version == OCFS2_LVB_VERSION + && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0; } @@ -1455,26 +1481,29 @@ static int ocfs2_meta_lock_update(struct inode *inode, { int status = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_lock_res *lockres; + struct ocfs2_lock_res *lockres = NULL; struct ocfs2_dinode *fe; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); spin_lock(&oi->ip_lock); if (oi->ip_flags & OCFS2_INODE_DELETED) { - mlog(0, "Orphaned inode %"MLFu64" was deleted while we " + mlog(0, "Orphaned inode %llu was deleted while we " "were waiting on a lock. ip_flags = 0x%x\n", - oi->ip_blkno, oi->ip_flags); + (unsigned long long)oi->ip_blkno, oi->ip_flags); spin_unlock(&oi->ip_lock); status = -ENOENT; goto bail; } spin_unlock(&oi->ip_lock); - lockres = &oi->ip_meta_lockres; + if (!ocfs2_mount_local(osb)) { + lockres = &oi->ip_meta_lockres; - if (!ocfs2_should_refresh_lock_res(lockres)) - goto bail; + if (!ocfs2_should_refresh_lock_res(lockres)) + goto bail; + } /* This will discard any caching information we might have had * for the inode metadata. */ @@ -1484,9 +1513,9 @@ static int ocfs2_meta_lock_update(struct inode *inode, * map (directories, bitmap files, etc) */ ocfs2_extent_map_trunc(inode, 0); - if (ocfs2_meta_lvb_is_trustable(lockres)) { - mlog(0, "Trusting LVB on inode %"MLFu64"\n", - oi->ip_blkno); + if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) { + mlog(0, "Trusting LVB on inode %llu\n", + (unsigned long long)oi->ip_blkno); ocfs2_refresh_inode_from_lvb(inode); } else { /* Boo, we have to go to disk. */ @@ -1514,15 +1543,16 @@ static int ocfs2_meta_lock_update(struct inode *inode, } mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation), - "Invalid dinode %"MLFu64" disk generation: %u " + "Invalid dinode %llu disk generation: %u " "inode->i_generation: %u\n", - oi->ip_blkno, le32_to_cpu(fe->i_generation), + (unsigned long long)oi->ip_blkno, + le32_to_cpu(fe->i_generation), inode->i_generation); mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), - "Stale dinode %"MLFu64" dtime: %"MLFu64" " - "flags: 0x%x\n", oi->ip_blkno, - le64_to_cpu(fe->i_dtime), + "Stale dinode %llu dtime: %llu flags: 0x%x\n", + (unsigned long long)oi->ip_blkno, + (unsigned long long)le64_to_cpu(fe->i_dtime), le32_to_cpu(fe->i_flags)); ocfs2_refresh_inode(inode, fe); @@ -1530,7 +1560,8 @@ static int ocfs2_meta_lock_update(struct inode *inode, status = 0; bail_refresh: - ocfs2_complete_lock_res_refresh(lockres, status); + if (lockres) + ocfs2_complete_lock_res_refresh(lockres, status); bail: mlog_exit(status); return status; @@ -1567,13 +1598,12 @@ static int ocfs2_assign_bh(struct inode *inode, * the result of the lock will be communicated via the callback. */ int ocfs2_meta_lock_full(struct inode *inode, - struct ocfs2_journal_handle *handle, struct buffer_head **ret_bh, int ex, int arg_flags) { int status, level, dlm_flags, acquired; - struct ocfs2_lock_res *lockres; + struct ocfs2_lock_res *lockres = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *local_bh = NULL; @@ -1581,8 +1611,8 @@ int ocfs2_meta_lock_full(struct inode *inode, mlog_entry_void(); - mlog(0, "inode %"MLFu64", take %s META lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu, take %s META lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); status = 0; @@ -1595,6 +1625,9 @@ int ocfs2_meta_lock_full(struct inode *inode, goto bail; } + if (ocfs2_mount_local(osb)) + goto local; + if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); @@ -1624,6 +1657,20 @@ int ocfs2_meta_lock_full(struct inode *inode, wait_event(osb->recovery_event, ocfs2_node_map_is_empty(osb, &osb->recovery_map)); +local: + /* + * We only see this flag if we're being called from + * ocfs2_read_locked_inode(). It means we're locking an inode + * which hasn't been populated yet, so clear the refresh flag + * and let the caller handle it. + */ + if (inode->i_state & I_NEW) { + status = 0; + if (lockres) + ocfs2_complete_lock_res_refresh(lockres, 0); + goto bail; + } + /* This is fun. The caller may want a bh back, or it may * not. ocfs2_meta_lock_update definitely wants one in, but * may or may not read one, depending on what's in the @@ -1644,12 +1691,6 @@ int ocfs2_meta_lock_full(struct inode *inode, } } - if (handle) { - status = ocfs2_handle_add_lock(handle, inode); - if (status < 0) - mlog_errno(status); - } - bail: if (status < 0) { if (ret_bh && (*ret_bh)) { @@ -1689,18 +1730,16 @@ bail: * the lock inversion simply. */ int ocfs2_meta_lock_with_page(struct inode *inode, - struct ocfs2_journal_handle *handle, struct buffer_head **ret_bh, int ex, struct page *page) { int ret; - ret = ocfs2_meta_lock_full(inode, handle, ret_bh, ex, - OCFS2_LOCK_NONBLOCK); + ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); - if (ocfs2_meta_lock(inode, handle, ret_bh, ex) == 0) + if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) ocfs2_meta_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } @@ -1708,19 +1747,59 @@ int ocfs2_meta_lock_with_page(struct inode *inode, return ret; } +int ocfs2_meta_lock_atime(struct inode *inode, + struct vfsmount *vfsmnt, + int *level) +{ + int ret; + + mlog_entry_void(); + ret = ocfs2_meta_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + /* + * If we should update atime, we will get EX lock, + * otherwise we just get PR lock. + */ + if (ocfs2_should_update_atime(inode, vfsmnt)) { + struct buffer_head *bh = NULL; + + ocfs2_meta_unlock(inode, 0); + ret = ocfs2_meta_lock(inode, &bh, 1); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + *level = 1; + if (ocfs2_should_update_atime(inode, vfsmnt)) + ocfs2_update_inode_atime(inode, bh); + if (bh) + brelse(bh); + } else + *level = 0; + + mlog_exit(ret); + return ret; +} + void ocfs2_meta_unlock(struct inode *inode, int ex) { int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry_void(); - mlog(0, "inode %"MLFu64" drop %s META lock\n", - OCFS2_I(inode)->ip_blkno, + mlog(0, "inode %llu drop %s META lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, ex ? "EXMODE" : "PRMODE"); - if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) + if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && + !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); mlog_exit_void(); @@ -1729,7 +1808,7 @@ void ocfs2_meta_unlock(struct inode *inode, int ocfs2_super_lock(struct ocfs2_super *osb, int ex) { - int status; + int status = 0; int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; struct buffer_head *bh; @@ -1740,6 +1819,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb, if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ocfs2_mount_local(osb)) + goto bail; + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); if (status < 0) { mlog_errno(status); @@ -1778,7 +1860,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, int level = ex ? LKM_EXMODE : LKM_PRMODE; struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; - ocfs2_cluster_unlock(osb, lockres, level); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, level); } int ocfs2_rename_lock(struct ocfs2_super *osb) @@ -1789,6 +1872,9 @@ int ocfs2_rename_lock(struct ocfs2_super *osb) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ocfs2_mount_local(osb)) + return 0; + status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); if (status < 0) mlog_errno(status); @@ -1800,7 +1886,40 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) { struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; - ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); +} + +int ocfs2_dentry_lock(struct dentry *dentry, int ex) +{ + int ret; + int level = ex ? LKM_EXMODE : LKM_PRMODE; + struct ocfs2_dentry_lock *dl = dentry->d_fsdata; + struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + + BUG_ON(!dl); + + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + + ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); + if (ret < 0) + mlog_errno(ret); + + return ret; +} + +void ocfs2_dentry_unlock(struct dentry *dentry, int ex) +{ + int level = ex ? LKM_EXMODE : LKM_PRMODE; + struct ocfs2_dentry_lock *dl = dentry->d_fsdata; + struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + + if (!ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); } /* Reference counting of the dlm debug structure. We want this because @@ -1933,9 +2052,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) if (!lockres) return -EINVAL; - seq_printf(m, "0x%x\t" - "%.*s\t" - "%d\t" + seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); + + if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) + seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, + lockres->l_name, + (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); + else + seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); + + seq_printf(m, "%d\t" "0x%lx\t" "0x%x\t" "0x%x\t" @@ -1943,8 +2069,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) "%u\t" "%d\t" "%d\t", - OCFS2_DLM_DEBUG_STR_VERSION, - OCFS2_LOCK_ID_MAX_LEN, lockres->l_name, lockres->l_level, lockres->l_flags, lockres->l_action, @@ -1995,7 +2119,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) mlog_errno(ret); goto out; } - osb = (struct ocfs2_super *) inode->u.generic_ip; + osb = inode->i_private; ocfs2_get_dlm_debug(osb->osb_dlm_debug); priv->p_dlm_debug = osb->osb_dlm_debug; INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); @@ -2017,7 +2141,7 @@ out: return ret; } -static struct file_operations ocfs2_dlm_debug_fops = { +static const struct file_operations ocfs2_dlm_debug_fops = { .open = ocfs2_dlm_debug_open, .release = ocfs2_dlm_debug_release, .read = seq_read, @@ -2058,12 +2182,15 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) int ocfs2_dlm_init(struct ocfs2_super *osb) { - int status; + int status = 0; u32 dlm_key; - struct dlm_ctxt *dlm; + struct dlm_ctxt *dlm = NULL; mlog_entry_void(); + if (ocfs2_mount_local(osb)) + goto local; + status = ocfs2_dlm_init_debug(osb); if (status < 0) { mlog_errno(status); @@ -2071,8 +2198,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) } /* launch vote thread */ - osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote-%d", - osb->osb_id); + osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); if (IS_ERR(osb->vote_task)) { status = PTR_ERR(osb->vote_task); osb->vote_task = NULL; @@ -2092,11 +2218,12 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) goto bail; } + dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); + +local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); - dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); - osb->dlm = dlm; status = 0; @@ -2135,7 +2262,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) mlog_exit_void(); } -static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) +static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) { struct ocfs2_lock_res *lockres = opaque; unsigned long flags; @@ -2191,24 +2318,20 @@ complete_unlock: mlog_exit_void(); } -typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *); - -struct drop_lock_cb { - ocfs2_pre_drop_cb_t *drop_func; - void *drop_data; -}; - static int ocfs2_drop_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - struct drop_lock_cb *dcb) + struct ocfs2_lock_res *lockres) { enum dlm_status status; unsigned long flags; + int lkm_flags = 0; /* We didn't get anywhere near actually using this lockres. */ if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) goto out; + if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lkm_flags |= LKM_VALBLK; + spin_lock_irqsave(&lockres->l_lock, flags); mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), @@ -2231,8 +2354,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, spin_lock_irqsave(&lockres->l_lock, flags); } - if (dcb) - dcb->drop_func(lockres, dcb->drop_data); + if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { + if (lockres->l_flags & OCFS2_LOCK_ATTACHED && + lockres->l_level == LKM_EXMODE && + !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) + lockres->l_ops->set_lvb(lockres); + } if (lockres->l_flags & OCFS2_LOCK_BUSY) mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", @@ -2258,8 +2385,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, mlog(0, "lock %s\n", lockres->l_name); - status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, - lockres->l_ops->unlock_ast, lockres); + status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, + ocfs2_unlock_ast, lockres); if (status != DLM_NORMAL) { ocfs2_log_dlm_error("dlmunlock", status, lockres); mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); @@ -2306,43 +2433,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) spin_unlock_irqrestore(&lockres->l_lock, flags); } -static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) +void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres) { - int status; - - mlog_entry_void(); - - ocfs2_mark_lockres_freeing(&osb->osb_super_lockres); - - status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL); - if (status < 0) - mlog_errno(status); - - ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres); - - status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL); - if (status < 0) - mlog_errno(status); + int ret; - mlog_exit(status); + ocfs2_mark_lockres_freeing(lockres); + ret = ocfs2_drop_lock(osb, lockres); + if (ret) + mlog_errno(ret); } -static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) +static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) { - struct inode *inode = data; - - /* the metadata lock requires a bit more work as we have an - * LVB to worry about. */ - if (lockres->l_flags & OCFS2_LOCK_ATTACHED && - lockres->l_level == LKM_EXMODE && - !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) - __ocfs2_stuff_meta_lvb(inode); + ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); + ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); } int ocfs2_drop_inode_locks(struct inode *inode) { int status, err; - struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, }; mlog_entry_void(); @@ -2350,24 +2460,21 @@ int ocfs2_drop_inode_locks(struct inode *inode) * ocfs2_clear_inode has done it for us. */ err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), - &OCFS2_I(inode)->ip_data_lockres, - NULL); + &OCFS2_I(inode)->ip_data_lockres); if (err < 0) mlog_errno(err); status = err; err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), - &OCFS2_I(inode)->ip_meta_lockres, - &meta_dcb); + &OCFS2_I(inode)->ip_meta_lockres); if (err < 0) mlog_errno(err); if (err < 0 && !status) status = err; err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), - &OCFS2_I(inode)->ip_rw_lockres, - NULL); + &OCFS2_I(inode)->ip_rw_lockres); if (err < 0) mlog_errno(err); if (err < 0 && !status) @@ -2416,9 +2523,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, &lockres->l_lksb, dlm_flags, lockres->l_name, - lockres->l_ops->ast, + OCFS2_LOCK_ID_MAX_LEN - 1, + ocfs2_locking_ast, lockres, - lockres->l_ops->bast); + ocfs2_blocking_ast); if (status != DLM_NORMAL) { ocfs2_log_dlm_error("dlmlock", status, lockres); ret = -EINVAL; @@ -2477,7 +2585,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_CANCEL, - lockres->l_ops->unlock_ast, + ocfs2_unlock_ast, lockres); if (status != DLM_NORMAL) { ocfs2_log_dlm_error("dlmunlock", status, lockres); @@ -2491,115 +2599,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, return ret; } -static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, - struct ocfs2_lock_res *lockres, - int new_level) -{ - int ret; - - mlog_entry_void(); - - BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); - - if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { - ret = 0; - mlog(0, "lockres %s currently being refreshed -- backing " - "off!\n", lockres->l_name); - } else if (new_level == LKM_PRMODE) - ret = !lockres->l_ex_holders && - ocfs2_inode_fully_checkpointed(inode); - else /* Must be NLMODE we're converting to. */ - ret = !lockres->l_ro_holders && !lockres->l_ex_holders && - ocfs2_inode_fully_checkpointed(inode); - - mlog_exit(ret); - return ret; -} - -static int ocfs2_do_unblock_meta(struct inode *inode, - int *requeue) -{ - int new_level; - int set_lvb = 0; - int ret = 0; - struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; - unsigned long flags; - - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - - mlog_entry_void(); - - spin_lock_irqsave(&lockres->l_lock, flags); - - BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); - - mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level, - lockres->l_blocking); - - BUG_ON(lockres->l_level != LKM_EXMODE && - lockres->l_level != LKM_PRMODE); - - if (lockres->l_flags & OCFS2_LOCK_BUSY) { - *requeue = 1; - ret = ocfs2_prepare_cancel_convert(osb, lockres); - spin_unlock_irqrestore(&lockres->l_lock, flags); - if (ret) { - ret = ocfs2_cancel_convert(osb, lockres); - if (ret < 0) - mlog_errno(ret); - } - goto leave; - } - - new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); - - mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n", - lockres->l_level, lockres->l_blocking, new_level); - - if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) { - if (lockres->l_level == LKM_EXMODE) - set_lvb = 1; - - /* If the lock hasn't been refreshed yet (rare), then - * our memory inode values are old and we skip - * stuffing the lvb. There's no need to actually clear - * out the lvb here as it's value is still valid. */ - if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { - if (set_lvb) - __ocfs2_stuff_meta_lvb(inode); - } else - mlog(0, "lockres %s: downconverting stale lock!\n", - lockres->l_name); - - mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, " - "l_blocking=%d, new_level=%d\n", - lockres->l_level, lockres->l_blocking, new_level); - - ocfs2_prepare_downconvert(lockres, new_level); - spin_unlock_irqrestore(&lockres->l_lock, flags); - ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); - goto leave; - } - if (!ocfs2_inode_fully_checkpointed(inode)) - ocfs2_start_checkpoint(osb); - - *requeue = 1; - spin_unlock_irqrestore(&lockres->l_lock, flags); - ret = 0; -leave: - mlog_exit(ret); - return ret; -} - -static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int *requeue, - ocfs2_convert_worker_t *worker) +static int ocfs2_unblock_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + struct ocfs2_unblock_ctl *ctl) { unsigned long flags; int blocking; int new_level; int ret = 0; + int set_lvb = 0; mlog_entry_void(); @@ -2609,7 +2617,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb, recheck: if (lockres->l_flags & OCFS2_LOCK_BUSY) { - *requeue = 1; + ctl->requeue = 1; ret = ocfs2_prepare_cancel_convert(osb, lockres); spin_unlock_irqrestore(&lockres->l_lock, flags); if (ret) { @@ -2623,27 +2631,33 @@ recheck: /* if we're blocking an exclusive and we have *any* holders, * then requeue. */ if ((lockres->l_blocking == LKM_EXMODE) - && (lockres->l_ex_holders || lockres->l_ro_holders)) { - spin_unlock_irqrestore(&lockres->l_lock, flags); - *requeue = 1; - ret = 0; - goto leave; - } + && (lockres->l_ex_holders || lockres->l_ro_holders)) + goto leave_requeue; /* If it's a PR we're blocking, then only * requeue if we've got any EX holders */ if (lockres->l_blocking == LKM_PRMODE && - lockres->l_ex_holders) { - spin_unlock_irqrestore(&lockres->l_lock, flags); - *requeue = 1; - ret = 0; - goto leave; - } + lockres->l_ex_holders) + goto leave_requeue; + + /* + * Can we get a lock in this state if the holder counts are + * zero? The meta data unblock code used to check this. + */ + if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) + && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) + goto leave_requeue; + + new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); + + if (lockres->l_ops->check_downconvert + && !lockres->l_ops->check_downconvert(lockres, new_level)) + goto leave_requeue; /* If we get here, then we know that there are no more * incompatible holders (and anyone asking for an incompatible * lock is blocked). We can now downconvert the lock */ - if (!worker) + if (!lockres->l_ops->downconvert_worker) goto downconvert; /* Some lockres types want to do a bit of work before @@ -2653,7 +2667,10 @@ recheck: blocking = lockres->l_blocking; spin_unlock_irqrestore(&lockres->l_lock, flags); - worker(lockres, blocking); + ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); + + if (ctl->unblock_action == UNBLOCK_STOP_POST) + goto leave; spin_lock_irqsave(&lockres->l_lock, flags); if (blocking != lockres->l_blocking) { @@ -2663,36 +2680,62 @@ recheck: } downconvert: - *requeue = 0; - new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); + ctl->requeue = 0; + + if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { + if (lockres->l_level == LKM_EXMODE) + set_lvb = 1; + + /* + * We only set the lvb if the lock has been fully + * refreshed - otherwise we risk setting stale + * data. Otherwise, there's no need to actually clear + * out the lvb here as it's value is still valid. + */ + if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) + lockres->l_ops->set_lvb(lockres); + } ocfs2_prepare_downconvert(lockres, new_level); spin_unlock_irqrestore(&lockres->l_lock, flags); - ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); + ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); leave: mlog_exit(ret); return ret; + +leave_requeue: + spin_unlock_irqrestore(&lockres->l_lock, flags); + ctl->requeue = 1; + + mlog_exit(0); + return 0; } -static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, - int blocking) +static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, + int blocking) { struct inode *inode; struct address_space *mapping; - mlog_entry_void(); - inode = ocfs2_lock_res_inode(lockres); mapping = inode->i_mapping; + /* + * We need this before the filemap_fdatawrite() so that it can + * transfer the dirty bit from the PTE to the + * page. Unfortunately this means that even for EX->PR + * downconverts, we'll lose our mappings and have to build + * them up again. + */ + unmap_mapping_range(mapping, 0, 0, 0); + if (filemap_fdatawrite(mapping)) { - mlog(ML_ERROR, "Could not sync inode %"MLFu64" for downconvert!", - OCFS2_I(inode)->ip_blkno); + mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", + (unsigned long long)OCFS2_I(inode)->ip_blkno); } sync_mapping_buffers(mapping); if (blocking == LKM_EXMODE) { truncate_inode_pages(mapping, 0); - unmap_mapping_range(mapping, 0, 0, 0); } else { /* We only need to wait on the I/O if we're not also * truncating pages because truncate_inode_pages waits @@ -2702,114 +2745,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, filemap_fdatawait(mapping); } - mlog_exit_void(); + return UNBLOCK_CONTINUE; } -int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, - int *requeue) +static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, + int new_level) { - int status; - struct inode *inode; - struct ocfs2_super *osb; - - mlog_entry_void(); + struct inode *inode = ocfs2_lock_res_inode(lockres); + int checkpointed = ocfs2_inode_fully_checkpointed(inode); - inode = ocfs2_lock_res_inode(lockres); - osb = OCFS2_SB(inode->i_sb); - - mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno); - - status = ocfs2_generic_unblock_lock(osb, - lockres, - requeue, - ocfs2_data_convert_worker); - if (status < 0) - mlog_errno(status); + BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); + BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); - mlog(0, "inode %"MLFu64", requeue = %d\n", - OCFS2_I(inode)->ip_blkno, *requeue); + if (checkpointed) + return 1; - mlog_exit(status); - return status; + ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); + return 0; } -static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, - int *requeue) +static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) { - int status; - struct inode *inode; - - mlog_entry_void(); - - mlog(0, "Unblock lockres %s\n", lockres->l_name); - - inode = ocfs2_lock_res_inode(lockres); + struct inode *inode = ocfs2_lock_res_inode(lockres); - status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), - lockres, - requeue, - NULL); - if (status < 0) - mlog_errno(status); - - mlog_exit(status); - return status; + __ocfs2_stuff_meta_lvb(inode); } - -int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, - int *requeue) +/* + * Does the final reference drop on our dentry lock. Right now this + * happens in the vote thread, but we could choose to simplify the + * dlmglue API and push these off to the ocfs2_wq in the future. + */ +static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres) { - int status; - struct inode *inode; - - mlog_entry_void(); + struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); + ocfs2_dentry_lock_put(osb, dl); +} - inode = ocfs2_lock_res_inode(lockres); +/* + * d_delete() matching dentries before the lock downconvert. + * + * At this point, any process waiting to destroy the + * dentry_lock due to last ref count is stopped by the + * OCFS2_LOCK_QUEUED flag. + * + * We have two potential problems + * + * 1) If we do the last reference drop on our dentry_lock (via dput) + * we'll wind up in ocfs2_release_dentry_lock(), waiting on + * the downconvert to finish. Instead we take an elevated + * reference and push the drop until after we've completed our + * unblock processing. + * + * 2) There might be another process with a final reference, + * waiting on us to finish processing. If this is the case, we + * detect it and exit out - there's no more dentries anyway. + */ +static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, + int blocking) +{ + struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); + struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); + struct dentry *dentry; + unsigned long flags; + int extra_ref = 0; - mlog(0, "unblock inode %"MLFu64"\n", OCFS2_I(inode)->ip_blkno); + /* + * This node is blocking another node from getting a read + * lock. This happens when we've renamed within a + * directory. We've forced the other nodes to d_delete(), but + * we never actually dropped our lock because it's still + * valid. The downconvert code will retain a PR for this node, + * so there's no further work to do. + */ + if (blocking == LKM_PRMODE) + return UNBLOCK_CONTINUE; - status = ocfs2_do_unblock_meta(inode, requeue); - if (status < 0) - mlog_errno(status); + /* + * Mark this inode as potentially orphaned. The code in + * ocfs2_delete_inode() will figure out whether it actually + * needs to be freed or not. + */ + spin_lock(&oi->ip_lock); + oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; + spin_unlock(&oi->ip_lock); - mlog(0, "inode %"MLFu64", requeue = %d\n", - OCFS2_I(inode)->ip_blkno, *requeue); + /* + * Yuck. We need to make sure however that the check of + * OCFS2_LOCK_FREEING and the extra reference are atomic with + * respect to a reference decrement or the setting of that + * flag. + */ + spin_lock_irqsave(&lockres->l_lock, flags); + spin_lock(&dentry_attach_lock); + if (!(lockres->l_flags & OCFS2_LOCK_FREEING) + && dl->dl_count) { + dl->dl_count++; + extra_ref = 1; + } + spin_unlock(&dentry_attach_lock); + spin_unlock_irqrestore(&lockres->l_lock, flags); - mlog_exit(status); - return status; -} + mlog(0, "extra_ref = %d\n", extra_ref); -/* Generic unblock function for any lockres whose private data is an - * ocfs2_super pointer. */ -static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, - int *requeue) -{ - int status; - struct ocfs2_super *osb; + /* + * We have a process waiting on us in ocfs2_dentry_iput(), + * which means we can't have any more outstanding + * aliases. There's no need to do any more work. + */ + if (!extra_ref) + return UNBLOCK_CONTINUE; + + spin_lock(&dentry_attach_lock); + while (1) { + dentry = ocfs2_find_local_alias(dl->dl_inode, + dl->dl_parent_blkno, 1); + if (!dentry) + break; + spin_unlock(&dentry_attach_lock); - mlog_entry_void(); + mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, + dentry->d_name.name); - mlog(0, "Unblock lockres %s\n", lockres->l_name); + /* + * The following dcache calls may do an + * iput(). Normally we don't want that from the + * downconverting thread, but in this case it's ok + * because the requesting node already has an + * exclusive lock on the inode, so it can't be queued + * for a downconvert. + */ + d_delete(dentry); + dput(dentry); - osb = ocfs2_lock_res_super(lockres); + spin_lock(&dentry_attach_lock); + } + spin_unlock(&dentry_attach_lock); - status = ocfs2_generic_unblock_lock(osb, - lockres, - requeue, - NULL); - if (status < 0) - mlog_errno(status); + /* + * If we are the last holder of this dentry lock, there is no + * reason to downconvert so skip straight to the unlock. + */ + if (dl->dl_count == 1) + return UNBLOCK_STOP_POST; - mlog_exit(status); - return status; + return UNBLOCK_CONTINUE_POST; } void ocfs2_process_blocked_lock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres) { int status; - int requeue = 0; + struct ocfs2_unblock_ctl ctl = {0, 0,}; unsigned long flags; /* Our reference to the lockres in this function can be @@ -2820,7 +2908,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, BUG_ON(!lockres); BUG_ON(!lockres->l_ops); - BUG_ON(!lockres->l_ops->unblock); mlog(0, "lockres %s blocked.\n", lockres->l_name); @@ -2834,21 +2921,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, goto unqueue; spin_unlock_irqrestore(&lockres->l_lock, flags); - status = lockres->l_ops->unblock(lockres, &requeue); + status = ocfs2_unblock_lock(osb, lockres, &ctl); if (status < 0) mlog_errno(status); spin_lock_irqsave(&lockres->l_lock, flags); unqueue: - if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { + if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); } else ocfs2_schedule_blocked_lock(osb, lockres); mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, - requeue ? "yes" : "no"); + ctl.requeue ? "yes" : "no"); spin_unlock_irqrestore(&lockres->l_lock, flags); + if (ctl.unblock_action != UNBLOCK_CONTINUE + && lockres->l_ops->post_unlock) + lockres->l_ops->post_unlock(osb, lockres); + mlog_exit_void(); } @@ -2891,14 +2982,17 @@ void ocfs2_dump_meta_lvb_info(u64 level, mlog(level, "LVB information for %s (called from %s:%u):\n", lockres->l_name, function, line); - mlog(level, "version: %u, clusters: %u\n", - be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); - mlog(level, "size: %"MLFu64", uid %u, gid %u, mode 0x%x\n", - be64_to_cpu(lvb->lvb_isize), be32_to_cpu(lvb->lvb_iuid), - be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode)); - mlog(level, "nlink %u, atime_packed 0x%"MLFx64", " - "ctime_packed 0x%"MLFx64", mtime_packed 0x%"MLFx64"\n", - be16_to_cpu(lvb->lvb_inlink), be64_to_cpu(lvb->lvb_iatime_packed), - be64_to_cpu(lvb->lvb_ictime_packed), - be64_to_cpu(lvb->lvb_imtime_packed)); + mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", + lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), + be32_to_cpu(lvb->lvb_igeneration)); + mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", + (unsigned long long)be64_to_cpu(lvb->lvb_isize), + be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), + be16_to_cpu(lvb->lvb_imode)); + mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " + "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), + (long long)be64_to_cpu(lvb->lvb_iatime_packed), + (long long)be64_to_cpu(lvb->lvb_ictime_packed), + (long long)be64_to_cpu(lvb->lvb_imtime_packed), + be32_to_cpu(lvb->lvb_iattr)); }