X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fdcache.c;h=cbf76e692013281de5852d569896ac1625bd96fb;hb=d4c1b5f91280f0f15d82ddf2156117c300c490b9;hp=a56398970d1e7868eba5dc070faa7f736986a7e3;hpb=ec9397bab20a628530ce3051167d3d0fcc2c1af7;p=linux-2.6.git diff --git a/fs/dcache.c b/fs/dcache.c index a56398970..cbf76e692 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -14,10 +14,11 @@ * the dcache entry is deleted or garbage collected. */ -#include +#include #include #include #include +#include #include #include #include @@ -32,16 +33,16 @@ #include #include -/* #define DCACHE_DEBUG 1 */ -int sysctl_vfs_cache_pressure = 100; +int sysctl_vfs_cache_pressure __read_mostly = 100; +EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; + __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); -static kmem_cache_t *dentry_cache; +static kmem_cache_t *dentry_cache __read_mostly; #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) @@ -56,9 +57,9 @@ static kmem_cache_t *dentry_cache; #define D_HASHBITS d_hash_shift #define D_HASHMASK d_hash_mask -static unsigned int d_hash_mask; -static unsigned int d_hash_shift; -static struct hlist_head *dentry_hashtable; +static unsigned int d_hash_mask __read_mostly; +static unsigned int d_hash_shift __read_mostly; +static struct hlist_head *dentry_hashtable __read_mostly; static LIST_HEAD(dentry_unused); /* Statistics gathering. */ @@ -68,7 +69,7 @@ struct dentry_stat_t dentry_stat = { static void d_callback(struct rcu_head *head) { - struct dentry * dentry = container_of(head, struct dentry, d_rcu); + struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); if (dname_external(dentry)) kfree(dentry->d_name.name); @@ -83,11 +84,11 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - if (dentry->d_extra_attributes) { - kfree(dentry->d_extra_attributes); - dentry->d_extra_attributes = NULL; - } - call_rcu(&dentry->d_rcu, d_callback); + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } + call_rcu(&dentry->d_u.d_rcu, d_callback); } /* @@ -95,7 +96,7 @@ static void d_free(struct dentry *dentry) * d_iput() operation if defined. * Called with dcache_lock and per dentry lock held, drops both. */ -static inline void dentry_iput(struct dentry * dentry) +static void dentry_iput(struct dentry * dentry) { struct inode *inode = dentry->d_inode; if (inode) { @@ -103,6 +104,8 @@ static inline void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -159,7 +162,7 @@ repeat: spin_unlock(&dcache_lock); return; } - + /* * AV: ->d_delete() is _NOT_ allowed to block now. */ @@ -192,7 +195,7 @@ kill_it: { list_del(&dentry->d_lru); dentry_stat.nr_unused--; } - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); @@ -324,10 +327,13 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon) struct dentry * d_find_alias(struct inode *inode) { - struct dentry *de; - spin_lock(&dcache_lock); - de = __d_find_alias(inode, 0); - spin_unlock(&dcache_lock); + struct dentry *de = NULL; + + if (!list_empty(&inode->i_dentry)) { + spin_lock(&dcache_lock); + de = __d_find_alias(inode, 0); + spin_unlock(&dcache_lock); + } return de; } @@ -337,35 +343,37 @@ struct dentry * d_find_alias(struct inode *inode) */ void d_prune_aliases(struct inode *inode) { - struct list_head *tmp, *head = &inode->i_dentry; + struct dentry *dentry; restart: spin_lock(&dcache_lock); - tmp = head; - while ((tmp = tmp->next) != head) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + spin_lock(&dentry->d_lock); if (!atomic_read(&dentry->d_count)) { __dget_locked(dentry); __d_drop(dentry); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); dput(dentry); goto restart; } + spin_unlock(&dentry->d_lock); } spin_unlock(&dcache_lock); } /* - * Throw away a dentry - free the inode, dput the parent. - * This requires that the LRU list has already been - * removed. + * Throw away a dentry - free the inode, dput the parent. This requires that + * the LRU list has already been removed. + * * Called with dcache_lock, drops it and then regains. + * Called with dentry->d_lock held, drops it. */ -static inline void prune_one_dentry(struct dentry * dentry) +static void prune_one_dentry(struct dentry * dentry) { struct dentry * parent; __d_drop(dentry); - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ dentry_iput(dentry); parent = dentry->d_parent; @@ -378,6 +386,8 @@ static inline void prune_one_dentry(struct dentry * dentry) /** * prune_dcache - shrink the dcache * @count: number of entries to try and free + * @sb: if given, ignore dentries for other superblocks + * which are being unmounted. * * Shrink the dcache. This is done when we need * more memory, or simply when we need to unmount @@ -388,16 +398,29 @@ static inline void prune_one_dentry(struct dentry * dentry) * all the dentries are in use. */ -static void prune_dcache(int count) +static void prune_dcache(int count, struct super_block *sb) { spin_lock(&dcache_lock); for (; count ; count--) { struct dentry *dentry; struct list_head *tmp; + struct rw_semaphore *s_umount; cond_resched_lock(&dcache_lock); tmp = dentry_unused.prev; + if (sb) { + /* Try to find a dentry for this sb, but don't try + * too hard, if they aren't near the tail they will + * be moved down again soon + */ + int skip = count; + while (skip && tmp != &dentry_unused && + list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { + skip--; + tmp = tmp->prev; + } + } if (tmp == &dentry_unused) break; list_del_init(tmp); @@ -423,7 +446,45 @@ static void prune_dcache(int count) spin_unlock(&dentry->d_lock); continue; } - prune_one_dentry(dentry); + /* + * If the dentry is not DCACHED_REFERENCED, it is time + * to remove it from the dcache, provided the super block is + * NULL (which means we are trying to reclaim memory) + * or this dentry belongs to the same super block that + * we want to shrink. + */ + /* + * If this dentry is for "my" filesystem, then I can prune it + * without taking the s_umount lock (I already hold it). + */ + if (sb && dentry->d_sb == sb) { + prune_one_dentry(dentry); + continue; + } + /* + * ...otherwise we need to be sure this filesystem isn't being + * unmounted, otherwise we could race with + * generic_shutdown_super(), and end up holding a reference to + * an inode while the filesystem is unmounted. + * So we try to get s_umount, and make sure s_root isn't NULL. + * (Take a local copy of s_umount to avoid a use-after-free of + * `dentry'). + */ + s_umount = &dentry->d_sb->s_umount; + if (down_read_trylock(s_umount)) { + if (dentry->d_sb->s_root != NULL) { + prune_one_dentry(dentry); + up_read(s_umount); + continue; + } + up_read(s_umount); + } + spin_unlock(&dentry->d_lock); + /* Cannot remove the first dentry, and it isn't appropriate + * to move it to the head of the list, so give up, and try + * later + */ + break; } spin_unlock(&dcache_lock); } @@ -460,25 +521,18 @@ void shrink_dcache_sb(struct super_block * sb) * superblock to the most recent end of the unused list. */ spin_lock(&dcache_lock); - next = dentry_unused.next; - while (next != &dentry_unused) { - tmp = next; - next = tmp->next; + list_for_each_safe(tmp, next, &dentry_unused) { dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; - list_del(tmp); - list_add(tmp, &dentry_unused); + list_move(tmp, &dentry_unused); } /* * Pass two ... free the dentries for this superblock. */ repeat: - next = dentry_unused.next; - while (next != &dentry_unused) { - tmp = next; - next = tmp->next; + list_for_each_safe(tmp, next, &dentry_unused) { dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; @@ -490,11 +544,142 @@ repeat: continue; } prune_one_dentry(dentry); + cond_resched_lock(&dcache_lock); goto repeat; } spin_unlock(&dcache_lock); } +/* + * destroy a single subtree of dentries for unmount + * - see the comments on shrink_dcache_for_umount() for a description of the + * locking + */ +static void shrink_dcache_for_umount_subtree(struct dentry *dentry) +{ + struct dentry *parent; + + BUG_ON(!IS_ROOT(dentry)); + + /* detach this root from the system */ + spin_lock(&dcache_lock); + if (!list_empty(&dentry->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&dentry->d_lru); + } + __d_drop(dentry); + spin_unlock(&dcache_lock); + + for (;;) { + /* descend to the first leaf in the current subtree */ + while (!list_empty(&dentry->d_subdirs)) { + struct dentry *loop; + + /* this is a branch with children - detach all of them + * from the system in one go */ + spin_lock(&dcache_lock); + list_for_each_entry(loop, &dentry->d_subdirs, + d_u.d_child) { + if (!list_empty(&loop->d_lru)) { + dentry_stat.nr_unused--; + list_del_init(&loop->d_lru); + } + + __d_drop(loop); + cond_resched_lock(&dcache_lock); + } + spin_unlock(&dcache_lock); + + /* move to the first child */ + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } + + /* consume the dentries from this leaf up through its parents + * until we find one with children or run out altogether */ + do { + struct inode *inode; + + if (atomic_read(&dentry->d_count) != 0) { + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + atomic_read(&dentry->d_count), + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } + + parent = dentry->d_parent; + if (parent == dentry) + parent = NULL; + else + atomic_dec(&parent->d_count); + + list_del(&dentry->d_u.d_child); + dentry_stat.nr_dentry--; /* For d_free, below */ + + inode = dentry->d_inode; + if (inode) { + dentry->d_inode = NULL; + list_del_init(&dentry->d_alias); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); + } + + d_free(dentry); + + /* finished when we fall off the top of the tree, + * otherwise we ascend to the parent and move to the + * next sibling if there is one */ + if (!parent) + return; + + dentry = parent; + + } while (list_empty(&dentry->d_subdirs)); + + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } +} + +/* + * destroy the dentries attached to a superblock on unmounting + * - we don't need to use dentry->d_lock, and only need dcache_lock when + * removing the dentry from the system lists and hashes because: + * - the superblock is detached from all mountings and open files, so the + * dentry trees will not be rearranged by the VFS + * - s_umount is write-locked, so the memory pressure shrinker will ignore + * any dentries belonging to this superblock that it comes across + * - the filesystem itself is no longer permitted to rearrange the dentries + * in this superblock + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + atomic_dec(&dentry->d_count); + shrink_dcache_for_umount_subtree(dentry); + + while (!hlist_empty(&sb->s_anon)) { + dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); + shrink_dcache_for_umount_subtree(dentry); + } +} + /* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs @@ -522,7 +707,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; /* Have we found a mount point ? */ if (d_mountpoint(dentry)) @@ -536,7 +721,7 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -553,6 +738,13 @@ positive: * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue * searching. + * + * It returns zero iff there are no unused children, + * otherwise it returns the number of children moved to + * the end of the unused list. This may not be the total + * number of unused children, because select_parent can + * drop the lock and return early due to latency + * constraints. */ static int select_parent(struct dentry * parent) { @@ -566,7 +758,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (!list_empty(&dentry->d_lru)) { @@ -578,19 +770,24 @@ resume: * of the unused list for prune_dcache */ if (!atomic_read(&dentry->d_count)) { - list_add(&dentry->d_lru, dentry_unused.prev); + list_add_tail(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; found++; } + + /* + * We can return to the caller if we have found some (this + * ensures forward progress). We'll be coming back to find + * the rest. + */ + if (found && need_resched()) + goto out; + /* * Descend a level if the d_subdirs list is non-empty. */ if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; -#ifdef DCACHE_DEBUG -printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, found); -#endif goto repeat; } } @@ -598,14 +795,11 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found); * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; -#ifdef DCACHE_DEBUG -printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", -this_parent->d_parent->d_name.name, this_parent->d_name.name, found); -#endif goto resume; } +out: spin_unlock(&dcache_lock); return found; } @@ -622,46 +816,7 @@ void shrink_dcache_parent(struct dentry * parent) int found; while ((found = select_parent(parent)) != 0) - prune_dcache(found); -} - -/** - * shrink_dcache_anon - further prune the cache - * @head: head of d_hash list of dentries to prune - * - * Prune the dentries that are anonymous - * - * parsing d_hash list does not hlist_for_each_rcu() as it - * done under dcache_lock. - * - */ -void shrink_dcache_anon(struct hlist_head *head) -{ - struct hlist_node *lp; - int found; - do { - found = 0; - spin_lock(&dcache_lock); - hlist_for_each(lp, head) { - struct dentry *this = hlist_entry(lp, struct dentry, d_hash); - if (!list_empty(&this->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&this->d_lru); - } - - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache - */ - if (!atomic_read(&this->d_count)) { - list_add_tail(&this->d_lru, &dentry_unused); - dentry_stat.nr_unused++; - found++; - } - } - spin_unlock(&dcache_lock); - prune_dcache(found); - } while(found); + prune_dcache(found, parent->d_sb); } /* @@ -676,12 +831,12 @@ void shrink_dcache_anon(struct hlist_head *head) * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(int nr, unsigned int gfp_mask) +static int shrink_dcache_memory(int nr, gfp_t gfp_mask) { if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; - prune_dcache(nr); + prune_dcache(nr, NULL); } return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } @@ -723,7 +878,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) atomic_set(&dentry->d_count, 1); dentry->d_flags = DCACHE_UNHASHED; - dentry->d_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; @@ -731,8 +886,9 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_fsdata = NULL; dentry->d_extra_attributes = NULL; dentry->d_mounted = 0; +#ifdef CONFIG_PROFILING dentry->d_cookie = NULL; - dentry->d_bucket = NULL; +#endif INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -742,18 +898,28 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_parent = dget(parent); dentry->d_sb = parent->d_sb; } else { - INIT_LIST_HEAD(&dentry->d_child); + INIT_LIST_HEAD(&dentry->d_u.d_child); } spin_lock(&dcache_lock); if (parent) - list_add(&dentry->d_child, &parent->d_subdirs); + list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); return dentry; } +struct dentry *d_alloc_name(struct dentry *parent, const char *name) +{ + struct qstr q; + + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + return d_alloc(parent, &q); +} + /** * d_instantiate - fill in inode information for a dentry * @entry: dentry to complete @@ -771,15 +937,88 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) void d_instantiate(struct dentry *entry, struct inode * inode) { - if (!list_empty(&entry->d_alias)) BUG(); + BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); if (inode) list_add(&entry->d_alias, &inode->i_dentry); entry->d_inode = inode; + fsnotify_d_instantiate(entry, inode); spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } +/** + * d_instantiate_unique - instantiate a non-aliased dentry + * @entry: dentry to instantiate + * @inode: inode to attach to this dentry + * + * Fill in inode information in the entry. On success, it returns NULL. + * If an unhashed alias of "entry" already exists, then we return the + * aliased dentry instead and drop one reference to inode. + * + * Note that in order to avoid conflicts with rename() etc, the caller + * had better be holding the parent directory semaphore. + * + * This also assumes that the inode count has been incremented + * (or otherwise set) by the caller to indicate that it is now + * in use by the dcache. + */ +static struct dentry *__d_instantiate_unique(struct dentry *entry, + struct inode *inode) +{ + struct dentry *alias; + int len = entry->d_name.len; + const char *name = entry->d_name.name; + unsigned int hash = entry->d_name.hash; + + if (!inode) { + entry->d_inode = NULL; + return NULL; + } + + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + struct qstr *qstr = &alias->d_name; + + if (qstr->hash != hash) + continue; + if (alias->d_parent != entry->d_parent) + continue; + if (qstr->len != len) + continue; + if (memcmp(qstr->name, name, len)) + continue; + dget_locked(alias); + return alias; + } + + list_add(&entry->d_alias, &inode->i_dentry); + entry->d_inode = inode; + fsnotify_d_instantiate(entry, inode); + return NULL; +} + +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *result; + + BUG_ON(!list_empty(&entry->d_alias)); + + spin_lock(&dcache_lock); + result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_lock); + + if (!result) { + security_d_instantiate(entry, inode); + return NULL; + } + + BUG_ON(!d_unhashed(result)); + iput(inode); + return result; +} + +EXPORT_SYMBOL(d_instantiate_unique); + /** * d_alloc_root - allocate root dentry * @root_inode: inode to allocate the root for @@ -861,12 +1100,6 @@ struct dentry * d_alloc_anon(struct inode *inode) res->d_sb = inode->i_sb; res->d_parent = res; res->d_inode = inode; - - /* - * Set d_bucket to an "impossible" bucket address so - * that d_move() doesn't get a false positive - */ - res->d_bucket = NULL; res->d_flags |= DCACHE_DISCONNECTED; res->d_flags &= ~DCACHE_UNHASHED; list_add(&res->d_alias, &inode->i_dentry); @@ -910,6 +1143,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); + fsnotify_d_instantiate(new, inode); spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_rehash(dentry); @@ -919,6 +1153,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) /* d_instantiate takes dcache_lock, so we do it by hand */ list_add(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; + fsnotify_d_instantiate(dentry, inode); spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); @@ -980,17 +1215,13 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct hlist_head *head = d_hash(parent,hash); struct dentry *found = NULL; struct hlist_node *node; + struct dentry *dentry; rcu_read_lock(); - hlist_for_each_rcu(node, head) { - struct dentry *dentry; + hlist_for_each_entry_rcu(dentry, node, head, d_hash) { struct qstr *qstr; - dentry = hlist_entry(node, struct dentry, d_hash); - - smp_rmb(); - if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) @@ -998,13 +1229,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) spin_lock(&dentry->d_lock); - /* - * If lookup ends up in a different bucket due to concurrent - * rename, fail it - */ - if (unlikely(dentry->d_bucket != head)) - goto terminate; - /* * Recheck the dentry after taking the lock - d_move may have * changed things. Don't bother checking the hash because we're @@ -1013,7 +1237,11 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) if (dentry->d_parent != parent) goto next; - qstr = rcu_dereference(&dentry->d_name); + /* + * It is safe to compare names since d_move() cannot + * change the qstr (protected by d_lock). + */ + qstr = &dentry->d_name; if (parent->d_op && parent->d_op->d_compare) { if (parent->d_op->d_compare(parent, qstr, name)) goto next; @@ -1028,7 +1256,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) atomic_inc(&dentry->d_count); found = dentry; } -terminate: spin_unlock(&dentry->d_lock); break; next: @@ -1039,6 +1266,32 @@ next: return found; } +/** + * d_hash_and_lookup - hash the qstr then search for a dentry + * @dir: Directory to search in + * @name: qstr of name we wish to find + * + * On hash failure or on lookup failure NULL is returned. + */ +struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) +{ + struct dentry *dentry = NULL; + + /* + * Check for a fs-specific hash function. Note that we must + * calculate the standard hash first, as the d_op->d_hash() + * routine may choose to leave the hash value unchanged. + */ + name->hash = full_name_hash(name->name, name->len); + if (dir->d_op && dir->d_op->d_hash) { + if (dir->d_op->d_hash(dir, name) < 0) + goto out; + } + dentry = d_lookup(dir, name); +out: + return dentry; +} + /** * d_validate - verify dentry provided from insecure source * @dentry: The dentry alleged to be valid child of @dparent @@ -1066,7 +1319,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) spin_lock(&dcache_lock); base = d_hash(dparent, dentry->d_name.hash); hlist_for_each(lhp,base) { - /* hlist_for_each_rcu() not required for d_hash list + /* hlist_for_each_entry_rcu() not required for d_hash list * as it is parsed under dcache_lock */ if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { @@ -1103,13 +1356,19 @@ out: void d_delete(struct dentry * dentry) { + int isdir = 0; /* * Are we the only user? */ spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); + isdir = S_ISDIR(dentry->d_inode->i_mode); if (atomic_read(&dentry->d_count) == 1) { dentry_iput(dentry); + fsnotify_nameremove(dentry, isdir); + + /* remove this and other inotify debug checks after 2.6.18 */ + dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; return; } @@ -1118,6 +1377,20 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + + fsnotify_nameremove(dentry, isdir); +} + +static void __d_rehash(struct dentry * entry, struct hlist_head *list) +{ + + entry->d_flags &= ~DCACHE_UNHASHED; + hlist_add_head_rcu(&entry->d_hash, list); +} + +static void _d_rehash(struct dentry * entry) +{ + __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); } /** @@ -1129,14 +1402,10 @@ void d_delete(struct dentry * dentry) void d_rehash(struct dentry * entry) { - struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); - entry->d_flags &= ~DCACHE_UNHASHED; + _d_rehash(entry); spin_unlock(&entry->d_lock); - entry->d_bucket = list; - hlist_add_head_rcu(&entry->d_hash, list); spin_unlock(&dcache_lock); } @@ -1214,6 +1483,8 @@ static void switch_names(struct dentry *dentry, struct dentry *target) void d_move(struct dentry * dentry, struct dentry * target) { + struct hlist_head *list; + if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -1224,42 +1495,40 @@ void d_move(struct dentry * dentry, struct dentry * target) */ if (target < dentry) { spin_lock(&target->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&dentry->d_lock); - spin_lock(&target->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); } /* Move the dentry to the target hash queue, if on different bucket */ if (dentry->d_flags & DCACHE_UNHASHED) goto already_unhashed; - if (dentry->d_bucket != target->d_bucket) { - hlist_del_rcu(&dentry->d_hash); + + hlist_del_rcu(&dentry->d_hash); + already_unhashed: - dentry->d_bucket = target->d_bucket; - hlist_add_head_rcu(&dentry->d_hash, target->d_bucket); - dentry->d_flags &= ~DCACHE_UNHASHED; - } + list = d_hash(target->d_parent, target->d_name.hash); + __d_rehash(dentry, list); /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - /* flush any possible attributes */ - if (dentry->d_extra_attributes) { - kfree(dentry->d_extra_attributes); - dentry->d_extra_attributes = NULL; - } - if (target->d_extra_attributes) { - kfree(target->d_extra_attributes); - target->d_extra_attributes = NULL; - } + /* flush any possible attributes */ + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } + if (target->d_extra_attributes) { + kfree(target->d_extra_attributes); + target->d_extra_attributes = NULL; + } - list_del(&dentry->d_child); - list_del(&target->d_child); + list_del(&dentry->d_u.d_child); + list_del(&target->d_u.d_child); /* Switch the names.. */ switch_names(dentry, target); - smp_wmb(); do_switch(dentry->d_name.len, target->d_name.len); do_switch(dentry->d_name.hash, target->d_name.hash); @@ -1267,21 +1536,136 @@ already_unhashed: if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_child); + INIT_LIST_HEAD(&target->d_u.d_child); } else { do_switch(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_child, &target->d_parent->d_subdirs); + list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); spin_unlock(&target->d_lock); + fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); spin_unlock(&dcache_lock); } +/* + * Prepare an anonymous dentry for life in the superblock's dentry tree as a + * named dentry in place of the dentry to be replaced. + */ +static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) +{ + struct dentry *dparent, *aparent; + + switch_names(dentry, anon); + do_switch(dentry->d_name.len, anon->d_name.len); + do_switch(dentry->d_name.hash, anon->d_name.hash); + + dparent = dentry->d_parent; + aparent = anon->d_parent; + + dentry->d_parent = (aparent == anon) ? dentry : aparent; + list_del(&dentry->d_u.d_child); + if (!IS_ROOT(dentry)) + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&dentry->d_u.d_child); + + anon->d_parent = (dparent == dentry) ? anon : dparent; + list_del(&anon->d_u.d_child); + if (!IS_ROOT(anon)) + list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&anon->d_u.d_child); + + anon->d_flags &= ~DCACHE_DISCONNECTED; +} + +/** + * d_materialise_unique - introduce an inode into the tree + * @dentry: candidate dentry + * @inode: inode to bind to the dentry, to which aliases may be attached + * + * Introduces an dentry into the tree, substituting an extant disconnected + * root directory alias in its place if there is one + */ +struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) +{ + struct dentry *alias, *actual; + + BUG_ON(!d_unhashed(dentry)); + + spin_lock(&dcache_lock); + + if (!inode) { + actual = dentry; + dentry->d_inode = NULL; + goto found_lock; + } + + /* See if a disconnected directory already exists as an anonymous root + * that we should splice into the tree instead */ + if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { + spin_lock(&alias->d_lock); + + /* Is this a mountpoint that we could splice into our tree? */ + if (IS_ROOT(alias)) + goto connect_mountpoint; + + if (alias->d_name.len == dentry->d_name.len && + alias->d_parent == dentry->d_parent && + memcmp(alias->d_name.name, + dentry->d_name.name, + dentry->d_name.len) == 0) + goto replace_with_alias; + + spin_unlock(&alias->d_lock); + + /* Doh! Seem to be aliasing directories for some reason... */ + dput(alias); + } + + /* Add a unique reference */ + actual = __d_instantiate_unique(dentry, inode); + if (!actual) + actual = dentry; + else if (unlikely(!d_unhashed(actual))) + goto shouldnt_be_hashed; + +found_lock: + spin_lock(&actual->d_lock); +found: + _d_rehash(actual); + spin_unlock(&actual->d_lock); + spin_unlock(&dcache_lock); + + if (actual == dentry) { + security_d_instantiate(dentry, inode); + return NULL; + } + + iput(inode); + return actual; + + /* Convert the anonymous/root alias into an ordinary dentry */ +connect_mountpoint: + __d_materialise_dentry(dentry, alias); + + /* Replace the candidate dentry with the alias in the tree */ +replace_with_alias: + __d_drop(alias); + actual = alias; + goto found; + +shouldnt_be_hashed: + spin_unlock(&dcache_lock); + BUG(); + goto shouldnt_be_hashed; +} + /** * d_path - return the path of a dentry * @dentry: dentry to report @@ -1478,7 +1862,6 @@ int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) struct dentry * saved = new_dentry; unsigned long seq; - result = 0; /* need rcu_readlock to protect against the d_parent trashing due to * d_move */ @@ -1486,6 +1869,7 @@ int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) do { /* for restarting inner loop in case of seq retry */ new_dentry = saved; + result = 0; seq = read_seqbegin(&rename_lock); for (;;) { if (new_dentry != old_dentry) { @@ -1515,7 +1899,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (d_unhashed(dentry)||!dentry->d_inode) continue; @@ -1526,7 +1910,7 @@ resume: atomic_dec(&dentry->d_count); } if (this_parent != root) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; atomic_dec(&this_parent->d_count); this_parent = this_parent->d_parent; goto resume; @@ -1553,26 +1937,12 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name) struct dentry * dentry; ino_t ino = 0; - /* - * Check for a fs-specific hash function. Note that we must - * calculate the standard hash first, as the d_op->d_hash() - * routine may choose to leave the hash value unchanged. - */ - name->hash = full_name_hash(name->name, name->len); - if (dir->d_op && dir->d_op->d_hash) - { - if (dir->d_op->d_hash(dir, name) != 0) - goto out; - } - - dentry = d_lookup(dir, name); - if (dentry) - { + dentry = d_hash_and_lookup(dir, name); + if (dentry) { if (dentry->d_inode) ino = dentry->d_inode->i_ino; dput(dentry); } -out: return ino; } @@ -1590,14 +1960,21 @@ static void __init dcache_init_early(void) { int loop; + /* If hashes are distributed across NUMA nodes, defer + * hash allocation until vmalloc space is available. + */ + if (hashdist) + return; + dentry_hashtable = alloc_large_system_hash("Dentry cache", sizeof(struct hlist_head), dhash_entries, 13, - 0, + HASH_EARLY, &d_hash_shift, - &d_hash_mask); + &d_hash_mask, + 0); for (loop = 0; loop < (1 << d_hash_shift); loop++) INIT_HLIST_HEAD(&dentry_hashtable[loop]); @@ -1622,6 +1999,8 @@ EXPORT_SYMBOL_GPL(flush_dentry_attributes); static void __init dcache_init(unsigned long mempages) { + int loop; + /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature @@ -1630,17 +2009,35 @@ static void __init dcache_init(unsigned long mempages) dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), NULL, NULL); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); + + /* Hash may have been set up in dcache_init_early */ + if (!hashdist) + return; + + dentry_hashtable = + alloc_large_system_hash("Dentry cache", + sizeof(struct hlist_head), + dhash_entries, + 13, + 0, + &d_hash_shift, + &d_hash_mask, + 0); + + for (loop = 0; loop < (1 << d_hash_shift); loop++) + INIT_HLIST_HEAD(&dentry_hashtable[loop]); } /* SLAB cache for __getname() consumers */ -kmem_cache_t *names_cachep; +kmem_cache_t *names_cachep __read_mostly; /* SLAB cache for file structures */ -kmem_cache_t *filp_cachep; +kmem_cache_t *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); @@ -1667,7 +2064,7 @@ void __init vfs_caches_init(unsigned long mempages) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); dcache_init(mempages); inode_init(mempages); @@ -1686,6 +2083,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_invalidate); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_move); +EXPORT_SYMBOL_GPL(d_materialise_unique); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash);