*/
#include <linux/config.h>
+#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
+#include <linux/bootmem.h>
-#define DCACHE_PARANOIA 1
/* #define DCACHE_DEBUG 1 */
-spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+int sysctl_vfs_cache_pressure = 100;
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
EXPORT_SYMBOL(dcache_lock);
.age_limit = 45,
};
-static void d_callback(void *arg)
+static void d_callback(struct rcu_head *head)
{
- struct dentry * dentry = (struct dentry *)arg;
+ struct dentry * dentry = container_of(head, struct dentry, d_rcu);
if (dname_external(dentry))
kfree(dentry->d_name.name);
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- call_rcu(&dentry->d_rcu, d_callback, dentry);
+ call_rcu(&dentry->d_rcu, d_callback);
}
/*
return;
repeat:
+ if (atomic_read(&dentry->d_count) == 1)
+ might_sleep();
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;
spin_unlock(&dcache_lock);
return;
}
-
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
+ * @want_discon: flag, used by d_splice_alias, to request
+ * that only a DISCONNECTED alias be returned.
*
- * If inode has a hashed alias - acquire the reference to alias and
- * return it. Otherwise return NULL. Notice that if inode is a directory
- * there can be only one alias and it can be unhashed only if it has
- * no children.
+ * If inode has a hashed alias, or is a directory and has any alias,
+ * acquire the reference to alias and return it. Otherwise return NULL.
+ * Notice that if inode is a directory there can be only one alias and
+ * it can be unhashed only if it has no children, or if it is the root
+ * of a filesystem.
*
* If the inode has a DCACHE_DISCONNECTED alias, then prefer
- * any other hashed alias over that one.
+ * any other hashed alias over that one unless @want_discon is set,
+ * in which case only return a DCACHE_DISCONNECTED alias.
*/
-struct dentry * d_find_alias(struct inode *inode)
+static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
{
struct list_head *head, *next, *tmp;
struct dentry *alias, *discon_alias=NULL;
- spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
next = tmp->next;
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
- if (!d_unhashed(alias)) {
+ if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (alias->d_flags & DCACHE_DISCONNECTED)
discon_alias = alias;
- else {
+ else if (!want_discon) {
__dget_locked(alias);
- spin_unlock(&dcache_lock);
return alias;
}
}
}
if (discon_alias)
__dget_locked(discon_alias);
- spin_unlock(&dcache_lock);
return discon_alias;
}
+struct dentry * d_find_alias(struct inode *inode)
+{
+ struct dentry *de;
+ spin_lock(&dcache_lock);
+ de = __d_find_alias(inode, 0);
+ spin_unlock(&dcache_lock);
+ return de;
+}
+
/*
* Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
struct dentry *dentry;
struct list_head *tmp;
+ cond_resched_lock(&dcache_lock);
+
tmp = dentry_unused.prev;
if (tmp == &dentry_unused)
break;
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
* searching.
+ *
+ * It returns zero iff there are no unused children,
+ * otherwise it returns the number of children moved to
+ * the end of the unused list. This may not be the total
+ * number of unused children, because select_parent can
+ * drop the lock and return early due to latency
+ * constraints.
*/
static int select_parent(struct dentry * parent)
{
dentry_stat.nr_unused++;
found++;
}
+
+ /*
+ * We can return to the caller if we have found some (this
+ * ensures forward progress). We'll be coming back to find
+ * the rest.
+ */
+ if (found && need_resched())
+ goto out;
+
/*
* Descend a level if the d_subdirs list is non-empty.
*/
#endif
goto resume;
}
+out:
spin_unlock(&dcache_lock);
return found;
}
*
* Prune the dentries that are anonymous
*
- * parsing d_hash list does not read_barrier_depends() as it
+ * parsing d_hash list does not hlist_for_each_rcu() as it
* done under dcache_lock.
*
*/
struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
if (!list_empty(&this->d_lru)) {
dentry_stat.nr_unused--;
- list_del(&this->d_lru);
+ list_del_init(&this->d_lru);
}
/*
return -1;
prune_dcache(nr);
}
- return dentry_stat.nr_unused;
+ return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
/**
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
- dentry->d_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
dentry->d_cookie = NULL;
- dentry->d_bucket = NULL;
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
return dentry;
}
+struct dentry *d_alloc_name(struct dentry *parent, const char *name)
+{
+ struct qstr q;
+
+ q.name = name;
+ q.len = strlen(name);
+ q.hash = full_name_hash(q.name, q.len);
+ return d_alloc(parent, &q);
+}
+
/**
* d_instantiate - fill in inode information for a dentry
* @entry: dentry to complete
security_d_instantiate(entry, inode);
}
+/**
+ * d_instantiate_unique - instantiate a non-aliased dentry
+ * @entry: dentry to instantiate
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. On success, it returns NULL.
+ * If an unhashed alias of "entry" already exists, then we return the
+ * aliased dentry instead.
+ *
+ * Note that in order to avoid conflicts with rename() etc, the caller
+ * had better be holding the parent directory semaphore.
+ */
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *alias;
+ int len = entry->d_name.len;
+ const char *name = entry->d_name.name;
+ unsigned int hash = entry->d_name.hash;
+
+ BUG_ON(!list_empty(&entry->d_alias));
+ spin_lock(&dcache_lock);
+ if (!inode)
+ goto do_negative;
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ struct qstr *qstr = &alias->d_name;
+
+ if (qstr->hash != hash)
+ continue;
+ if (alias->d_parent != entry->d_parent)
+ continue;
+ if (qstr->len != len)
+ continue;
+ if (memcmp(qstr->name, name, len))
+ continue;
+ dget_locked(alias);
+ spin_unlock(&dcache_lock);
+ BUG_ON(!d_unhashed(alias));
+ return alias;
+ }
+ list_add(&entry->d_alias, &inode->i_dentry);
+do_negative:
+ entry->d_inode = inode;
+ spin_unlock(&dcache_lock);
+ security_d_instantiate(entry, inode);
+ return NULL;
+}
+EXPORT_SYMBOL(d_instantiate_unique);
+
/**
* d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for
tmp->d_parent = tmp; /* make sure dput doesn't croak */
spin_lock(&dcache_lock);
- if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
- /* A directory can only have one dentry.
- * This (now) has one, so use it.
- */
- res = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- __dget_locked(res);
- } else {
+ res = __d_find_alias(inode, 0);
+ if (!res) {
/* attach a disconnected dentry */
res = tmp;
tmp = NULL;
- if (res) {
- spin_lock(&res->d_lock);
- res->d_sb = inode->i_sb;
- res->d_parent = res;
- res->d_inode = inode;
+ spin_lock(&res->d_lock);
+ res->d_sb = inode->i_sb;
+ res->d_parent = res;
+ res->d_inode = inode;
+ res->d_flags |= DCACHE_DISCONNECTED;
+ res->d_flags &= ~DCACHE_UNHASHED;
+ list_add(&res->d_alias, &inode->i_dentry);
+ hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
+ spin_unlock(&res->d_lock);
- /*
- * Set d_bucket to an "impossible" bucket address so
- * that d_move() doesn't get a false positive
- */
- res->d_bucket = NULL;
- res->d_flags |= DCACHE_DISCONNECTED;
- res->d_flags &= ~DCACHE_UNHASHED;
- list_add(&res->d_alias, &inode->i_dentry);
- hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
- spin_unlock(&res->d_lock);
- }
inode = NULL; /* don't drop reference */
}
spin_unlock(&dcache_lock);
* DCACHE_DISCONNECTED), then d_move that in place of the given dentry
* and return it, else simply d_add the inode to the dentry and return NULL.
*
- * This is (will be) needed in the lookup routine of any filesystem that is exportable
+ * This is needed in the lookup routine of any filesystem that is exportable
* (via knfsd) so that we can build dcache paths to directories effectively.
*
* If a dentry was found and moved, then it is returned. Otherwise NULL
{
struct dentry *new = NULL;
- if (inode && S_ISDIR(inode->i_mode)) {
+ if (inode) {
spin_lock(&dcache_lock);
- if (!list_empty(&inode->i_dentry)) {
- new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- __dget_locked(new);
+ new = __d_find_alias(inode, 1);
+ if (new) {
+ BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
rcu_read_lock();
- hlist_for_each (node, head) {
+ hlist_for_each_rcu(node, head) {
struct dentry *dentry;
struct qstr *qstr;
- smp_read_barrier_depends();
dentry = hlist_entry(node, struct dentry, d_hash);
- smp_rmb();
-
if (dentry->d_name.hash != hash)
continue;
if (dentry->d_parent != parent)
spin_lock(&dentry->d_lock);
- /*
- * If lookup ends up in a different bucket due to concurrent
- * rename, fail it
- */
- if (unlikely(dentry->d_bucket != head))
- goto terminate;
-
/*
* Recheck the dentry after taking the lock - d_move may have
* changed things. Don't bother checking the hash because we're
if (dentry->d_parent != parent)
goto next;
+ /*
+ * It is safe to compare names since d_move() cannot
+ * change the qstr (protected by d_lock).
+ */
qstr = &dentry->d_name;
- smp_read_barrier_depends();
if (parent->d_op && parent->d_op->d_compare) {
if (parent->d_op->d_compare(parent, qstr, name))
goto next;
atomic_inc(&dentry->d_count);
found = dentry;
}
-terminate:
spin_unlock(&dentry->d_lock);
break;
next:
spin_lock(&dcache_lock);
base = d_hash(dparent, dentry->d_name.hash);
hlist_for_each(lhp,base) {
- /* read_barrier_depends() not required for d_hash list
+ /* hlist_for_each_rcu() not required for d_hash list
* as it is parsed under dcache_lock
*/
if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
spin_unlock(&dcache_lock);
}
+static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+{
+
+ entry->d_flags &= ~DCACHE_UNHASHED;
+ hlist_add_head_rcu(&entry->d_hash, list);
+}
+
/**
* d_rehash - add an entry back to the hash
* @entry: dentry to add to the hash
spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
- entry->d_flags &= ~DCACHE_UNHASHED;
+ __d_rehash(entry, list);
spin_unlock(&entry->d_lock);
- entry->d_bucket = list;
- hlist_add_head_rcu(&entry->d_hash, list);
spin_unlock(&dcache_lock);
}
void d_move(struct dentry * dentry, struct dentry * target)
{
+ struct hlist_head *list;
+
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
/* Move the dentry to the target hash queue, if on different bucket */
if (dentry->d_flags & DCACHE_UNHASHED)
goto already_unhashed;
- if (dentry->d_bucket != target->d_bucket) {
- hlist_del_rcu(&dentry->d_hash);
+
+ hlist_del_rcu(&dentry->d_hash);
+
already_unhashed:
- dentry->d_bucket = target->d_bucket;
- hlist_add_head_rcu(&dentry->d_hash, target->d_bucket);
- dentry->d_flags &= ~DCACHE_UNHASHED;
- }
+ list = d_hash(target->d_parent, target->d_name.hash);
+ __d_rehash(dentry, list);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
/* Switch the names.. */
switch_names(dentry, target);
- smp_wmb();
do_switch(dentry->d_name.len, target->d_name.len);
do_switch(dentry->d_name.hash, target->d_name.hash);
}
__setup("dhash_entries=", set_dhash_entries);
+static void __init dcache_init_early(void)
+{
+ int loop;
+
+ /* If hashes are distributed across NUMA nodes, defer
+ * hash allocation until vmalloc space is available.
+ */
+ if (hashdist)
+ return;
+
+ dentry_hashtable =
+ alloc_large_system_hash("Dentry cache",
+ sizeof(struct hlist_head),
+ dhash_entries,
+ 13,
+ HASH_EARLY,
+ &d_hash_shift,
+ &d_hash_mask,
+ 0);
+
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+}
+
static void __init dcache_init(unsigned long mempages)
{
- struct hlist_head *d;
- unsigned long order;
- unsigned int nr_hash;
- int i;
+ int loop;
/*
* A constructor could be added for stable state like the lists,
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
- if (!dhash_entries)
- dhash_entries = PAGE_SHIFT < 13 ?
- mempages >> (13 - PAGE_SHIFT) :
- mempages << (PAGE_SHIFT - 13);
-
- dhash_entries *= sizeof(struct hlist_head);
- for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
- ;
-
- do {
- unsigned long tmp;
-
- nr_hash = (1UL << order) * PAGE_SIZE /
- sizeof(struct hlist_head);
- d_hash_mask = (nr_hash - 1);
-
- tmp = nr_hash;
- d_hash_shift = 0;
- while ((tmp >>= 1UL) != 0UL)
- d_hash_shift++;
-
- dentry_hashtable = (struct hlist_head *)
- __get_free_pages(GFP_ATOMIC, order);
- } while (dentry_hashtable == NULL && --order >= 0);
-
- printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
- nr_hash, order, (PAGE_SIZE << order));
-
- if (!dentry_hashtable)
- panic("Failed to allocate dcache hash table\n");
+ /* Hash may have been set up in dcache_init_early */
+ if (!hashdist)
+ return;
- d = dentry_hashtable;
- i = nr_hash;
- do {
- INIT_HLIST_HEAD(d);
- d++;
- i--;
- } while (i);
+ dentry_hashtable =
+ alloc_large_system_hash("Dentry cache",
+ sizeof(struct hlist_head),
+ dhash_entries,
+ 13,
+ 0,
+ &d_hash_shift,
+ &d_hash_mask,
+ 0);
+
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
/* SLAB cache for __getname() consumers */
extern void bdev_cache_init(void);
extern void chrdev_init(void);
+void __init vfs_caches_init_early(void)
+{
+ dcache_init_early();
+ inode_init_early();
+}
+
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
EXPORT_SYMBOL(dput);
EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(have_submounts);
-EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(shrink_dcache_anon);
EXPORT_SYMBOL(shrink_dcache_parent);
EXPORT_SYMBOL(shrink_dcache_sb);