* the dcache entry is deleted or garbage collected.
*/
-#include <linux/config.h>
+#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include <linux/vs_limit.h>
+#include "internal.h"
-/* #define DCACHE_DEBUG 1 */
-int sysctl_vfs_cache_pressure = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 100;
+EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
-spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(dcache_lock);
-static kmem_cache_t *dentry_cache;
+static struct kmem_cache *dentry_cache __read_mostly;
#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
#define D_HASHBITS d_hash_shift
#define D_HASHMASK d_hash_mask
-static unsigned int d_hash_mask;
-static unsigned int d_hash_shift;
-static struct hlist_head *dentry_hashtable;
+static unsigned int d_hash_mask __read_mostly;
+static unsigned int d_hash_shift __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
.age_limit = 45,
};
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
{
- struct dentry * dentry = container_of(head, struct dentry, d_rcu);
-
if (dname_external(dentry))
kfree(dentry->d_name.name);
kmem_cache_free(dentry_cache, dentry);
}
+static void d_callback(struct rcu_head *head)
+{
+ struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+ __d_free(dentry);
+}
+
/*
* no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
* inside dcache_lock.
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- call_rcu(&dentry->d_rcu, d_callback);
+ /* if dentry was never inserted into hash, immediate free is OK */
+ if (dentry->d_hash.pprev == NULL)
+ __d_free(dentry);
+ else
+ call_rcu(&dentry->d_u.d_rcu, d_callback);
}
/*
* d_iput() operation if defined.
* Called with dcache_lock and per dentry lock held, drops both.
*/
-static inline void dentry_iput(struct dentry * dentry)
+static void dentry_iput(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
if (inode) {
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
if (!dentry)
return;
+ vx_dentry_dec(dentry);
repeat:
+ if (atomic_read(&dentry->d_count) == 1)
+ might_sleep();
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;
spin_unlock(&dcache_lock);
return;
}
-
+
+ vx_dentry_dec(dentry);
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
list_del(&dentry->d_lru);
dentry_stat.nr_unused--;
}
- list_del(&dentry->d_child);
+ list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
if (!list_empty(&dentry->d_lru)) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
+ vx_dentry_inc(dentry);
}
return dentry;
}
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
+ * @want_discon: flag, used by d_splice_alias, to request
+ * that only a DISCONNECTED alias be returned.
*
- * If inode has a hashed alias - acquire the reference to alias and
- * return it. Otherwise return NULL. Notice that if inode is a directory
- * there can be only one alias and it can be unhashed only if it has
- * no children.
+ * If inode has a hashed alias, or is a directory and has any alias,
+ * acquire the reference to alias and return it. Otherwise return NULL.
+ * Notice that if inode is a directory there can be only one alias and
+ * it can be unhashed only if it has no children, or if it is the root
+ * of a filesystem.
*
- * If the inode has a DCACHE_DISCONNECTED alias, then prefer
- * any other hashed alias over that one.
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
+ * any other hashed alias over that one unless @want_discon is set,
+ * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
*/
-struct dentry * d_find_alias(struct inode *inode)
+static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
{
struct list_head *head, *next, *tmp;
struct dentry *alias, *discon_alias=NULL;
- spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
next = tmp->next;
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
- if (!d_unhashed(alias)) {
- if (alias->d_flags & DCACHE_DISCONNECTED)
+ if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (IS_ROOT(alias) &&
+ (alias->d_flags & DCACHE_DISCONNECTED))
discon_alias = alias;
- else {
+ else if (!want_discon) {
__dget_locked(alias);
- spin_unlock(&dcache_lock);
return alias;
}
}
}
if (discon_alias)
__dget_locked(discon_alias);
- spin_unlock(&dcache_lock);
return discon_alias;
}
+struct dentry * d_find_alias(struct inode *inode)
+{
+ struct dentry *de = NULL;
+
+ if (!list_empty(&inode->i_dentry)) {
+ spin_lock(&dcache_lock);
+ de = __d_find_alias(inode, 0);
+ spin_unlock(&dcache_lock);
+ }
+ return de;
+}
+
/*
* Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
*/
void d_prune_aliases(struct inode *inode)
{
- struct list_head *tmp, *head = &inode->i_dentry;
+ struct dentry *dentry;
restart:
spin_lock(&dcache_lock);
- tmp = head;
- while ((tmp = tmp->next) != head) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
__dget_locked(dentry);
__d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
dput(dentry);
goto restart;
}
+ spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
}
/*
- * Throw away a dentry - free the inode, dput the parent.
- * This requires that the LRU list has already been
- * removed.
+ * Throw away a dentry - free the inode, dput the parent. This requires that
+ * the LRU list has already been removed.
+ *
* Called with dcache_lock, drops it and then regains.
+ * Called with dentry->d_lock held, drops it.
*/
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
{
struct dentry * parent;
__d_drop(dentry);
- list_del(&dentry->d_child);
+ list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
dentry_iput(dentry);
parent = dentry->d_parent;
/**
* prune_dcache - shrink the dcache
* @count: number of entries to try and free
+ * @sb: if given, ignore dentries for other superblocks
+ * which are being unmounted.
*
* Shrink the dcache. This is done when we need
* more memory, or simply when we need to unmount
* all the dentries are in use.
*/
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct super_block *sb)
{
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
+ struct rw_semaphore *s_umount;
+
+ cond_resched_lock(&dcache_lock);
tmp = dentry_unused.prev;
+ if (sb) {
+ /* Try to find a dentry for this sb, but don't try
+ * too hard, if they aren't near the tail they will
+ * be moved down again soon
+ */
+ int skip = count;
+ while (skip && tmp != &dentry_unused &&
+ list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
+ skip--;
+ tmp = tmp->prev;
+ }
+ }
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
spin_unlock(&dentry->d_lock);
continue;
}
- prune_one_dentry(dentry);
+ /*
+ * If the dentry is not DCACHED_REFERENCED, it is time
+ * to remove it from the dcache, provided the super block is
+ * NULL (which means we are trying to reclaim memory)
+ * or this dentry belongs to the same super block that
+ * we want to shrink.
+ */
+ /*
+ * If this dentry is for "my" filesystem, then I can prune it
+ * without taking the s_umount lock (I already hold it).
+ */
+ if (sb && dentry->d_sb == sb) {
+ prune_one_dentry(dentry);
+ continue;
+ }
+ /*
+ * ...otherwise we need to be sure this filesystem isn't being
+ * unmounted, otherwise we could race with
+ * generic_shutdown_super(), and end up holding a reference to
+ * an inode while the filesystem is unmounted.
+ * So we try to get s_umount, and make sure s_root isn't NULL.
+ * (Take a local copy of s_umount to avoid a use-after-free of
+ * `dentry').
+ */
+ s_umount = &dentry->d_sb->s_umount;
+ if (down_read_trylock(s_umount)) {
+ if (dentry->d_sb->s_root != NULL) {
+ prune_one_dentry(dentry);
+ up_read(s_umount);
+ continue;
+ }
+ up_read(s_umount);
+ }
+ spin_unlock(&dentry->d_lock);
+ /*
+ * Insert dentry at the head of the list as inserting at the
+ * tail leads to a cycle.
+ */
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
}
spin_unlock(&dcache_lock);
}
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- list_del(tmp);
- list_add(tmp, &dentry_unused);
+ list_move(tmp, &dentry_unused);
}
/*
* Pass two ... free the dentries for this superblock.
*/
repeat:
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
continue;
}
prune_one_dentry(dentry);
+ cond_resched_lock(&dcache_lock);
goto repeat;
}
spin_unlock(&dcache_lock);
}
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ * locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+ struct dentry *parent;
+ unsigned detached = 0;
+
+ BUG_ON(!IS_ROOT(dentry));
+
+ /* detach this root from the system */
+ spin_lock(&dcache_lock);
+ if (!list_empty(&dentry->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&dentry->d_lru);
+ }
+ __d_drop(dentry);
+ spin_unlock(&dcache_lock);
+
+ for (;;) {
+ /* descend to the first leaf in the current subtree */
+ while (!list_empty(&dentry->d_subdirs)) {
+ struct dentry *loop;
+
+ /* this is a branch with children - detach all of them
+ * from the system in one go */
+ spin_lock(&dcache_lock);
+ list_for_each_entry(loop, &dentry->d_subdirs,
+ d_u.d_child) {
+ if (!list_empty(&loop->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&loop->d_lru);
+ }
+
+ __d_drop(loop);
+ cond_resched_lock(&dcache_lock);
+ }
+ spin_unlock(&dcache_lock);
+
+ /* move to the first child */
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+ }
+
+ /* consume the dentries from this leaf up through its parents
+ * until we find one with children or run out altogether */
+ do {
+ struct inode *inode;
+
+ if (atomic_read(&dentry->d_count) != 0) {
+ printk(KERN_ERR
+ "BUG: Dentry %p{i=%lx,n=%s}"
+ " still in use (%d)"
+ " [unmount of %s %s]\n",
+ dentry,
+ dentry->d_inode ?
+ dentry->d_inode->i_ino : 0UL,
+ dentry->d_name.name,
+ atomic_read(&dentry->d_count),
+ dentry->d_sb->s_type->name,
+ dentry->d_sb->s_id);
+ BUG();
+ }
+
+ parent = dentry->d_parent;
+ if (parent == dentry)
+ parent = NULL;
+ else
+ atomic_dec(&parent->d_count);
+
+ list_del(&dentry->d_u.d_child);
+ detached++;
+
+ inode = dentry->d_inode;
+ if (inode) {
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+ }
+
+ d_free(dentry);
+
+ /* finished when we fall off the top of the tree,
+ * otherwise we ascend to the parent and move to the
+ * next sibling if there is one */
+ if (!parent)
+ goto out;
+
+ dentry = parent;
+
+ } while (list_empty(&dentry->d_subdirs));
+
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+ }
+out:
+ /* several dentries were freed, need to correct nr_dentry */
+ spin_lock(&dcache_lock);
+ dentry_stat.nr_dentry -= detached;
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ * removing the dentry from the system lists and hashes because:
+ * - the superblock is detached from all mountings and open files, so the
+ * dentry trees will not be rearranged by the VFS
+ * - s_umount is write-locked, so the memory pressure shrinker will ignore
+ * any dentries belonging to this superblock that it comes across
+ * - the filesystem itself is no longer permitted to rearrange the dentries
+ * in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+ struct dentry *dentry;
+
+ if (down_read_trylock(&sb->s_umount))
+ BUG();
+
+ dentry = sb->s_root;
+ sb->s_root = NULL;
+ atomic_dec(&dentry->d_count);
+ shrink_dcache_for_umount_subtree(dentry);
+
+ while (!hlist_empty(&sb->s_anon)) {
+ dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+ shrink_dcache_for_umount_subtree(dentry);
+ }
+}
+
/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
/* Have we found a mount point ? */
if (d_mountpoint(dentry))
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
goto resume;
}
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
* searching.
+ *
+ * It returns zero iff there are no unused children,
+ * otherwise it returns the number of children moved to
+ * the end of the unused list. This may not be the total
+ * number of unused children, because select_parent can
+ * drop the lock and return early due to latency
+ * constraints.
*/
static int select_parent(struct dentry * parent)
{
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
if (!list_empty(&dentry->d_lru)) {
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add_tail(&dentry->d_lru, &dentry_unused);
dentry_stat.nr_unused++;
found++;
}
+
+ /*
+ * We can return to the caller if we have found some (this
+ * ensures forward progress). We'll be coming back to find
+ * the rest.
+ */
+ if (found && need_resched())
+ goto out;
+
/*
* Descend a level if the d_subdirs list is non-empty.
*/
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, found);
-#endif
goto repeat;
}
}
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
-this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
-#endif
goto resume;
}
+out:
spin_unlock(&dcache_lock);
return found;
}
int found;
while ((found = select_parent(parent)) != 0)
- prune_dcache(found);
-}
-
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not read_barrier_depends() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct hlist_head *head)
-{
- struct hlist_node *lp;
- int found;
- do {
- found = 0;
- spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
-
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
- spin_unlock(&dcache_lock);
- prune_dcache(found);
- } while(found);
+ prune_dcache(found, parent->d_sb);
}
/*
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr);
+ prune_dcache(nr, NULL);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
struct dentry *dentry;
char *dname;
+ if (!vx_dentry_avail(1))
+ return NULL;
+
dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
- dentry->d_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
+#ifdef CONFIG_PROFILING
dentry->d_cookie = NULL;
- dentry->d_bucket = NULL;
+#endif
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
- INIT_LIST_HEAD(&dentry->d_child);
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
if (parent)
- list_add(&dentry->d_child, &parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
+ vx_dentry_inc(dentry);
spin_unlock(&dcache_lock);
return dentry;
}
+struct dentry *d_alloc_name(struct dentry *parent, const char *name)
+{
+ struct qstr q;
+
+ q.name = name;
+ q.len = strlen(name);
+ q.hash = full_name_hash(q.name, q.len);
+ return d_alloc(parent, &q);
+}
+
/**
* d_instantiate - fill in inode information for a dentry
* @entry: dentry to complete
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- if (!list_empty(&entry->d_alias)) BUG();
+ BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
entry->d_inode = inode;
+ fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
+/**
+ * d_instantiate_unique - instantiate a non-aliased dentry
+ * @entry: dentry to instantiate
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. On success, it returns NULL.
+ * If an unhashed alias of "entry" already exists, then we return the
+ * aliased dentry instead and drop one reference to inode.
+ *
+ * Note that in order to avoid conflicts with rename() etc, the caller
+ * had better be holding the parent directory semaphore.
+ *
+ * This also assumes that the inode count has been incremented
+ * (or otherwise set) by the caller to indicate that it is now
+ * in use by the dcache.
+ */
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+ struct inode *inode)
+{
+ struct dentry *alias;
+ int len = entry->d_name.len;
+ const char *name = entry->d_name.name;
+ unsigned int hash = entry->d_name.hash;
+
+ if (!inode) {
+ entry->d_inode = NULL;
+ return NULL;
+ }
+
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ struct qstr *qstr = &alias->d_name;
+
+ if (qstr->hash != hash)
+ continue;
+ if (alias->d_parent != entry->d_parent)
+ continue;
+ if (qstr->len != len)
+ continue;
+ if (memcmp(qstr->name, name, len))
+ continue;
+ dget_locked(alias);
+ return alias;
+ }
+
+ list_add(&entry->d_alias, &inode->i_dentry);
+ entry->d_inode = inode;
+ fsnotify_d_instantiate(entry, inode);
+ return NULL;
+}
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *result;
+
+ BUG_ON(!list_empty(&entry->d_alias));
+
+ spin_lock(&dcache_lock);
+ result = __d_instantiate_unique(entry, inode);
+ spin_unlock(&dcache_lock);
+
+ if (!result) {
+ security_d_instantiate(entry, inode);
+ return NULL;
+ }
+
+ BUG_ON(!d_unhashed(result));
+ iput(inode);
+ return result;
+}
+
+EXPORT_SYMBOL(d_instantiate_unique);
+
/**
* d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for
tmp->d_parent = tmp; /* make sure dput doesn't croak */
spin_lock(&dcache_lock);
- if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
- /* A directory can only have one dentry.
- * This (now) has one, so use it.
- */
- res = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- __dget_locked(res);
- } else {
+ res = __d_find_alias(inode, 0);
+ if (!res) {
/* attach a disconnected dentry */
res = tmp;
tmp = NULL;
- if (res) {
- spin_lock(&res->d_lock);
- res->d_sb = inode->i_sb;
- res->d_parent = res;
- res->d_inode = inode;
+ spin_lock(&res->d_lock);
+ res->d_sb = inode->i_sb;
+ res->d_parent = res;
+ res->d_inode = inode;
+ res->d_flags |= DCACHE_DISCONNECTED;
+ res->d_flags &= ~DCACHE_UNHASHED;
+ list_add(&res->d_alias, &inode->i_dentry);
+ hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
+ spin_unlock(&res->d_lock);
- /*
- * Set d_bucket to an "impossible" bucket address so
- * that d_move() doesn't get a false positive
- */
- res->d_bucket = NULL;
- res->d_flags |= DCACHE_DISCONNECTED;
- res->d_flags &= ~DCACHE_UNHASHED;
- list_add(&res->d_alias, &inode->i_dentry);
- hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
- spin_unlock(&res->d_lock);
- }
inode = NULL; /* don't drop reference */
}
spin_unlock(&dcache_lock);
* DCACHE_DISCONNECTED), then d_move that in place of the given dentry
* and return it, else simply d_add the inode to the dentry and return NULL.
*
- * This is (will be) needed in the lookup routine of any filesystem that is exportable
+ * This is needed in the lookup routine of any filesystem that is exportable
* (via knfsd) so that we can build dcache paths to directories effectively.
*
* If a dentry was found and moved, then it is returned. Otherwise NULL
if (inode && S_ISDIR(inode->i_mode)) {
spin_lock(&dcache_lock);
- if (!list_empty(&inode->i_dentry)) {
- new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- __dget_locked(new);
+ new = __d_find_alias(inode, 1);
+ if (new) {
+ BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+ fsnotify_d_instantiate(new, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
/* d_instantiate takes dcache_lock, so we do it by hand */
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
+ fsnotify_d_instantiate(dentry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
struct hlist_head *head = d_hash(parent,hash);
struct dentry *found = NULL;
struct hlist_node *node;
+ struct dentry *dentry;
rcu_read_lock();
- hlist_for_each (node, head) {
- struct dentry *dentry;
+ hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
struct qstr *qstr;
- smp_read_barrier_depends();
- dentry = hlist_entry(node, struct dentry, d_hash);
-
- smp_rmb();
-
if (dentry->d_name.hash != hash)
continue;
if (dentry->d_parent != parent)
spin_lock(&dentry->d_lock);
- /*
- * If lookup ends up in a different bucket due to concurrent
- * rename, fail it
- */
- if (unlikely(dentry->d_bucket != head))
- goto terminate;
-
/*
* Recheck the dentry after taking the lock - d_move may have
* changed things. Don't bother checking the hash because we're
if (dentry->d_parent != parent)
goto next;
+ /*
+ * It is safe to compare names since d_move() cannot
+ * change the qstr (protected by d_lock).
+ */
qstr = &dentry->d_name;
- smp_read_barrier_depends();
if (parent->d_op && parent->d_op->d_compare) {
if (parent->d_op->d_compare(parent, qstr, name))
goto next;
if (!d_unhashed(dentry)) {
atomic_inc(&dentry->d_count);
+ vx_dentry_inc(dentry);
found = dentry;
}
-terminate:
spin_unlock(&dentry->d_lock);
break;
next:
return found;
}
+/**
+ * d_hash_and_lookup - hash the qstr then search for a dentry
+ * @dir: Directory to search in
+ * @name: qstr of name we wish to find
+ *
+ * On hash failure or on lookup failure NULL is returned.
+ */
+struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
+{
+ struct dentry *dentry = NULL;
+
+ /*
+ * Check for a fs-specific hash function. Note that we must
+ * calculate the standard hash first, as the d_op->d_hash()
+ * routine may choose to leave the hash value unchanged.
+ */
+ name->hash = full_name_hash(name->name, name->len);
+ if (dir->d_op && dir->d_op->d_hash) {
+ if (dir->d_op->d_hash(dir, name) < 0)
+ goto out;
+ }
+ dentry = d_lookup(dir, name);
+out:
+ return dentry;
+}
+
/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
spin_lock(&dcache_lock);
base = d_hash(dparent, dentry->d_name.hash);
hlist_for_each(lhp,base) {
- /* read_barrier_depends() not required for d_hash list
+ /* hlist_for_each_entry_rcu() not required for d_hash list
* as it is parsed under dcache_lock
*/
if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
void d_delete(struct dentry * dentry)
{
+ int isdir = 0;
/*
* Are we the only user?
*/
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
+ isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) {
dentry_iput(dentry);
+ fsnotify_nameremove(dentry, isdir);
+
+ /* remove this and other inotify debug checks after 2.6.18 */
+ dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
return;
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
+ fsnotify_nameremove(dentry, isdir);
+}
+
+static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+{
+
+ entry->d_flags &= ~DCACHE_UNHASHED;
+ hlist_add_head_rcu(&entry->d_hash, list);
+}
+
+static void _d_rehash(struct dentry * entry)
+{
+ __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
}
/**
void d_rehash(struct dentry * entry)
{
- struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
- entry->d_flags &= ~DCACHE_UNHASHED;
+ _d_rehash(entry);
spin_unlock(&entry->d_lock);
- entry->d_bucket = list;
- hlist_add_head_rcu(&entry->d_hash, list);
spin_unlock(&dcache_lock);
}
* deleted it.
*/
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way.
*/
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
{
+ struct hlist_head *list;
+
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- spin_lock(&dcache_lock);
write_seqlock(&rename_lock);
/*
* XXXX: do we really need to take target->d_lock?
*/
if (target < dentry) {
spin_lock(&target->d_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
} else {
spin_lock(&dentry->d_lock);
- spin_lock(&target->d_lock);
+ spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
}
/* Move the dentry to the target hash queue, if on different bucket */
if (dentry->d_flags & DCACHE_UNHASHED)
goto already_unhashed;
- if (dentry->d_bucket != target->d_bucket) {
- hlist_del_rcu(&dentry->d_hash);
+
+ hlist_del_rcu(&dentry->d_hash);
+
already_unhashed:
- dentry->d_bucket = target->d_bucket;
- hlist_add_head_rcu(&dentry->d_hash, target->d_bucket);
- dentry->d_flags &= ~DCACHE_UNHASHED;
- }
+ list = d_hash(target->d_parent, target->d_name.hash);
+ __d_rehash(dentry, list);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
- list_del(&dentry->d_child);
- list_del(&target->d_child);
+ list_del(&dentry->d_u.d_child);
+ list_del(&target->d_u.d_child);
/* Switch the names.. */
switch_names(dentry, target);
- smp_wmb();
do_switch(dentry->d_name.len, target->d_name.len);
do_switch(dentry->d_name.hash, target->d_name.hash);
if (IS_ROOT(dentry)) {
dentry->d_parent = target->d_parent;
target->d_parent = target;
- INIT_LIST_HEAD(&target->d_child);
+ INIT_LIST_HEAD(&target->d_u.d_child);
} else {
do_switch(dentry->d_parent, target->d_parent);
/* And add them back to the (new) parent lists */
- list_add(&target->d_child, &target->d_parent->d_subdirs);
+ list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
}
- list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
spin_unlock(&target->d_lock);
+ fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+ spin_lock(&dcache_lock);
+ d_move_locked(dentry, target);
spin_unlock(&dcache_lock);
}
+/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+ struct dentry *p;
+
+ for (p = p2; p->d_parent != p; p = p->d_parent) {
+ if (p->d_parent == p1)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+ struct mutex *m1 = NULL, *m2 = NULL;
+ struct dentry *ret;
+
+ /* If alias and dentry share a parent, then no extra locks required */
+ if (alias->d_parent == dentry->d_parent)
+ goto out_unalias;
+
+ /* Check for loops */
+ ret = ERR_PTR(-ELOOP);
+ if (d_isparent(alias, dentry))
+ goto out_err;
+
+ /* See lock_rename() */
+ ret = ERR_PTR(-EBUSY);
+ if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+ goto out_err;
+ m1 = &dentry->d_sb->s_vfs_rename_mutex;
+ if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ goto out_err;
+ m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+ d_move_locked(alias, dentry);
+ ret = alias;
+out_err:
+ spin_unlock(&dcache_lock);
+ if (m2)
+ mutex_unlock(m2);
+ if (m1)
+ mutex_unlock(m1);
+ return ret;
+}
+
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+ struct dentry *dparent, *aparent;
+
+ switch_names(dentry, anon);
+ do_switch(dentry->d_name.len, anon->d_name.len);
+ do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+ dparent = dentry->d_parent;
+ aparent = anon->d_parent;
+
+ dentry->d_parent = (aparent == anon) ? dentry : aparent;
+ list_del(&dentry->d_u.d_child);
+ if (!IS_ROOT(dentry))
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+ anon->d_parent = (dparent == dentry) ? anon : dparent;
+ list_del(&anon->d_u.d_child);
+ if (!IS_ROOT(anon))
+ list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&anon->d_u.d_child);
+
+ anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+ struct dentry *actual;
+
+ BUG_ON(!d_unhashed(dentry));
+
+ spin_lock(&dcache_lock);
+
+ if (!inode) {
+ actual = dentry;
+ dentry->d_inode = NULL;
+ goto found_lock;
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *alias;
+
+ /* Does an aliased dentry already exist? */
+ alias = __d_find_alias(inode, 0);
+ if (alias) {
+ actual = alias;
+ /* Is this an anonymous mountpoint that we could splice
+ * into our tree? */
+ if (IS_ROOT(alias)) {
+ spin_lock(&alias->d_lock);
+ __d_materialise_dentry(dentry, alias);
+ __d_drop(alias);
+ goto found;
+ }
+ /* Nope, but we must(!) avoid directory aliasing */
+ actual = __d_unalias(dentry, alias);
+ if (IS_ERR(actual))
+ dput(alias);
+ goto out_nolock;
+ }
+ }
+
+ /* Add a unique reference */
+ actual = __d_instantiate_unique(dentry, inode);
+ if (!actual)
+ actual = dentry;
+ else if (unlikely(!d_unhashed(actual)))
+ goto shouldnt_be_hashed;
+
+found_lock:
+ spin_lock(&actual->d_lock);
+found:
+ _d_rehash(actual);
+ spin_unlock(&actual->d_lock);
+ spin_unlock(&dcache_lock);
+out_nolock:
+ if (actual == dentry) {
+ security_d_instantiate(dentry, inode);
+ return NULL;
+ }
+
+ iput(inode);
+ return actual;
+
+shouldnt_be_hashed:
+ spin_unlock(&dcache_lock);
+ BUG();
+ goto shouldnt_be_hashed;
+}
+
/**
* d_path - return the path of a dentry
* @dentry: dentry to report
struct dentry * saved = new_dentry;
unsigned long seq;
- result = 0;
/* need rcu_readlock to protect against the d_parent trashing due to
* d_move
*/
do {
/* for restarting inner loop in case of seq retry */
new_dentry = saved;
+ result = 0;
seq = read_seqbegin(&rename_lock);
for (;;) {
if (new_dentry != old_dentry) {
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
if (d_unhashed(dentry)||!dentry->d_inode)
continue;
atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
atomic_dec(&this_parent->d_count);
this_parent = this_parent->d_parent;
goto resume;
struct dentry * dentry;
ino_t ino = 0;
- /*
- * Check for a fs-specific hash function. Note that we must
- * calculate the standard hash first, as the d_op->d_hash()
- * routine may choose to leave the hash value unchanged.
- */
- name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash)
- {
- if (dir->d_op->d_hash(dir, name) != 0)
- goto out;
- }
-
- dentry = d_lookup(dir, name);
- if (dentry)
- {
+ dentry = d_hash_and_lookup(dir, name);
+ if (dentry) {
if (dentry->d_inode)
ino = dentry->d_inode->i_ino;
dput(dentry);
}
-out:
return ino;
}
{
int loop;
+ /* If hashes are distributed across NUMA nodes, defer
+ * hash allocation until vmalloc space is available.
+ */
+ if (hashdist)
+ return;
+
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head),
dhash_entries,
13,
- 0,
+ HASH_EARLY,
&d_hash_shift,
- &d_hash_mask);
+ &d_hash_mask,
+ 0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
static void __init dcache_init(unsigned long mempages)
{
+ int loop;
+
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
dentry_cache = kmem_cache_create("dentry_cache",
sizeof(struct dentry),
0,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+ SLAB_MEM_SPREAD),
NULL, NULL);
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+
+ /* Hash may have been set up in dcache_init_early */
+ if (!hashdist)
+ return;
+
+ dentry_hashtable =
+ alloc_large_system_hash("Dentry cache",
+ sizeof(struct hlist_head),
+ dhash_entries,
+ 13,
+ 0,
+ &d_hash_shift,
+ &d_hash_mask,
+ 0);
+
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
/* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep;
+struct kmem_cache *names_cachep __read_mostly;
/* SLAB cache for file structures */
-kmem_cache_t *filp_cachep;
+struct kmem_cache *filp_cachep __read_mostly;
EXPORT_SYMBOL(d_genocide);
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
void __init vfs_caches_init_early(void)
{
dcache_init_early();
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
dcache_init(mempages);
inode_init(mempages);
EXPORT_SYMBOL(d_invalidate);
EXPORT_SYMBOL(d_lookup);
EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(d_rehash);
EXPORT_SYMBOL(dput);
EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(have_submounts);
-EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(shrink_dcache_anon);
EXPORT_SYMBOL(shrink_dcache_parent);
EXPORT_SYMBOL(shrink_dcache_sb);