*/
#include <linux/config.h>
+#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
-/* #define DCACHE_DEBUG 1 */
-int sysctl_vfs_cache_pressure = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 100;
+EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
-spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
EXPORT_SYMBOL(dcache_lock);
-static kmem_cache_t *dentry_cache;
+static kmem_cache_t *dentry_cache __read_mostly;
#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
#define D_HASHBITS d_hash_shift
#define D_HASHMASK d_hash_mask
-static unsigned int d_hash_mask;
-static unsigned int d_hash_shift;
-static struct hlist_head *dentry_hashtable;
+static unsigned int d_hash_mask __read_mostly;
+static unsigned int d_hash_shift __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
static void d_callback(struct rcu_head *head)
{
- struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+ struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
if (dname_external(dentry))
kfree(dentry->d_name.name);
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- if (dentry->d_extra_attributes) {
- kfree(dentry->d_extra_attributes);
- dentry->d_extra_attributes = NULL;
- }
- call_rcu(&dentry->d_rcu, d_callback);
+ if (dentry->d_extra_attributes) {
+ kfree(dentry->d_extra_attributes);
+ dentry->d_extra_attributes = NULL;
+ }
+ call_rcu(&dentry->d_u.d_rcu, d_callback);
}
/*
* d_iput() operation if defined.
* Called with dcache_lock and per dentry lock held, drops both.
*/
-static inline void dentry_iput(struct dentry * dentry)
+static void dentry_iput(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
if (inode) {
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
spin_unlock(&dcache_lock);
return;
}
-
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
list_del(&dentry->d_lru);
dentry_stat.nr_unused--;
}
- list_del(&dentry->d_child);
+ list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
struct dentry * d_find_alias(struct inode *inode)
{
- struct dentry *de;
- spin_lock(&dcache_lock);
- de = __d_find_alias(inode, 0);
- spin_unlock(&dcache_lock);
+ struct dentry *de = NULL;
+
+ if (!list_empty(&inode->i_dentry)) {
+ spin_lock(&dcache_lock);
+ de = __d_find_alias(inode, 0);
+ spin_unlock(&dcache_lock);
+ }
return de;
}
*/
void d_prune_aliases(struct inode *inode)
{
- struct list_head *tmp, *head = &inode->i_dentry;
+ struct dentry *dentry;
restart:
spin_lock(&dcache_lock);
- tmp = head;
- while ((tmp = tmp->next) != head) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
__dget_locked(dentry);
__d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
dput(dentry);
goto restart;
}
+ spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
}
struct dentry * parent;
__d_drop(dentry);
- list_del(&dentry->d_child);
+ list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
dentry_iput(dentry);
parent = dentry->d_parent;
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
* Pass two ... free the dentries for this superblock.
*/
repeat:
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
continue;
}
prune_one_dentry(dentry);
+ cond_resched_lock(&dcache_lock);
goto repeat;
}
spin_unlock(&dcache_lock);
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
/* Have we found a mount point ? */
if (d_mountpoint(dentry))
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
goto resume;
}
* list for prune_dcache(). We descend to the next level
* whenever the d_subdirs list is non-empty and continue
* searching.
+ *
+ * It returns zero iff there are no unused children,
+ * otherwise it returns the number of children moved to
+ * the end of the unused list. This may not be the total
+ * number of unused children, because select_parent can
+ * drop the lock and return early due to latency
+ * constraints.
*/
static int select_parent(struct dentry * parent)
{
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
if (!list_empty(&dentry->d_lru)) {
dentry_stat.nr_unused++;
found++;
}
+
+ /*
+ * We can return to the caller if we have found some (this
+ * ensures forward progress). We'll be coming back to find
+ * the rest.
+ */
+ if (found && need_resched())
+ goto out;
+
/*
* Descend a level if the d_subdirs list is non-empty.
*/
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, found);
-#endif
goto repeat;
}
}
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
-this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
-#endif
goto resume;
}
+out:
spin_unlock(&dcache_lock);
return found;
}
*
* Prune the dentries that are anonymous
*
- * parsing d_hash list does not hlist_for_each_rcu() as it
+ * parsing d_hash list does not hlist_for_each_entry_rcu() as it
* done under dcache_lock.
*
*/
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
- dentry->d_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_fsdata = NULL;
dentry->d_extra_attributes = NULL;
dentry->d_mounted = 0;
+#ifdef CONFIG_PROFILING
dentry->d_cookie = NULL;
- dentry->d_bucket = NULL;
+#endif
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
- INIT_LIST_HEAD(&dentry->d_child);
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
if (parent)
- list_add(&dentry->d_child, &parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
return dentry;
}
+struct dentry *d_alloc_name(struct dentry *parent, const char *name)
+{
+ struct qstr q;
+
+ q.name = name;
+ q.len = strlen(name);
+ q.hash = full_name_hash(q.name, q.len);
+ return d_alloc(parent, &q);
+}
+
/**
* d_instantiate - fill in inode information for a dentry
* @entry: dentry to complete
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- if (!list_empty(&entry->d_alias)) BUG();
+ BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
entry->d_inode = inode;
+ fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
+/**
+ * d_instantiate_unique - instantiate a non-aliased dentry
+ * @entry: dentry to instantiate
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. On success, it returns NULL.
+ * If an unhashed alias of "entry" already exists, then we return the
+ * aliased dentry instead and drop one reference to inode.
+ *
+ * Note that in order to avoid conflicts with rename() etc, the caller
+ * had better be holding the parent directory semaphore.
+ *
+ * This also assumes that the inode count has been incremented
+ * (or otherwise set) by the caller to indicate that it is now
+ * in use by the dcache.
+ */
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *alias;
+ int len = entry->d_name.len;
+ const char *name = entry->d_name.name;
+ unsigned int hash = entry->d_name.hash;
+
+ BUG_ON(!list_empty(&entry->d_alias));
+ spin_lock(&dcache_lock);
+ if (!inode)
+ goto do_negative;
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ struct qstr *qstr = &alias->d_name;
+
+ if (qstr->hash != hash)
+ continue;
+ if (alias->d_parent != entry->d_parent)
+ continue;
+ if (qstr->len != len)
+ continue;
+ if (memcmp(qstr->name, name, len))
+ continue;
+ dget_locked(alias);
+ spin_unlock(&dcache_lock);
+ BUG_ON(!d_unhashed(alias));
+ iput(inode);
+ return alias;
+ }
+ list_add(&entry->d_alias, &inode->i_dentry);
+do_negative:
+ entry->d_inode = inode;
+ fsnotify_d_instantiate(entry, inode);
+ spin_unlock(&dcache_lock);
+ security_d_instantiate(entry, inode);
+ return NULL;
+}
+EXPORT_SYMBOL(d_instantiate_unique);
+
/**
* d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for
res->d_sb = inode->i_sb;
res->d_parent = res;
res->d_inode = inode;
-
- /*
- * Set d_bucket to an "impossible" bucket address so
- * that d_move() doesn't get a false positive
- */
- res->d_bucket = NULL;
res->d_flags |= DCACHE_DISCONNECTED;
res->d_flags &= ~DCACHE_UNHASHED;
list_add(&res->d_alias, &inode->i_dentry);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+ fsnotify_d_instantiate(new, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
/* d_instantiate takes dcache_lock, so we do it by hand */
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
+ fsnotify_d_instantiate(dentry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
struct hlist_head *head = d_hash(parent,hash);
struct dentry *found = NULL;
struct hlist_node *node;
+ struct dentry *dentry;
rcu_read_lock();
- hlist_for_each_rcu(node, head) {
- struct dentry *dentry;
+ hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
struct qstr *qstr;
- dentry = hlist_entry(node, struct dentry, d_hash);
-
- smp_rmb();
-
if (dentry->d_name.hash != hash)
continue;
if (dentry->d_parent != parent)
spin_lock(&dentry->d_lock);
- /*
- * If lookup ends up in a different bucket due to concurrent
- * rename, fail it
- */
- if (unlikely(dentry->d_bucket != head))
- goto terminate;
-
/*
* Recheck the dentry after taking the lock - d_move may have
* changed things. Don't bother checking the hash because we're
if (dentry->d_parent != parent)
goto next;
- qstr = rcu_dereference(&dentry->d_name);
+ /*
+ * It is safe to compare names since d_move() cannot
+ * change the qstr (protected by d_lock).
+ */
+ qstr = &dentry->d_name;
if (parent->d_op && parent->d_op->d_compare) {
if (parent->d_op->d_compare(parent, qstr, name))
goto next;
atomic_inc(&dentry->d_count);
found = dentry;
}
-terminate:
spin_unlock(&dentry->d_lock);
break;
next:
return found;
}
+/**
+ * d_hash_and_lookup - hash the qstr then search for a dentry
+ * @dir: Directory to search in
+ * @name: qstr of name we wish to find
+ *
+ * On hash failure or on lookup failure NULL is returned.
+ */
+struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
+{
+ struct dentry *dentry = NULL;
+
+ /*
+ * Check for a fs-specific hash function. Note that we must
+ * calculate the standard hash first, as the d_op->d_hash()
+ * routine may choose to leave the hash value unchanged.
+ */
+ name->hash = full_name_hash(name->name, name->len);
+ if (dir->d_op && dir->d_op->d_hash) {
+ if (dir->d_op->d_hash(dir, name) < 0)
+ goto out;
+ }
+ dentry = d_lookup(dir, name);
+out:
+ return dentry;
+}
+
/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
spin_lock(&dcache_lock);
base = d_hash(dparent, dentry->d_name.hash);
hlist_for_each(lhp,base) {
- /* hlist_for_each_rcu() not required for d_hash list
+ /* hlist_for_each_entry_rcu() not required for d_hash list
* as it is parsed under dcache_lock
*/
if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
void d_delete(struct dentry * dentry)
{
+ int isdir = 0;
/*
* Are we the only user?
*/
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
+ isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) {
dentry_iput(dentry);
+ fsnotify_nameremove(dentry, isdir);
+
+ /* remove this and other inotify debug checks after 2.6.18 */
+ dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
return;
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
+ fsnotify_nameremove(dentry, isdir);
+}
+
+static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+{
+
+ entry->d_flags &= ~DCACHE_UNHASHED;
+ hlist_add_head_rcu(&entry->d_hash, list);
}
/**
spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
- entry->d_flags &= ~DCACHE_UNHASHED;
+ __d_rehash(entry, list);
spin_unlock(&entry->d_lock);
- entry->d_bucket = list;
- hlist_add_head_rcu(&entry->d_hash, list);
spin_unlock(&dcache_lock);
}
void d_move(struct dentry * dentry, struct dentry * target)
{
+ struct hlist_head *list;
+
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
/* Move the dentry to the target hash queue, if on different bucket */
if (dentry->d_flags & DCACHE_UNHASHED)
goto already_unhashed;
- if (dentry->d_bucket != target->d_bucket) {
- hlist_del_rcu(&dentry->d_hash);
+
+ hlist_del_rcu(&dentry->d_hash);
+
already_unhashed:
- dentry->d_bucket = target->d_bucket;
- hlist_add_head_rcu(&dentry->d_hash, target->d_bucket);
- dentry->d_flags &= ~DCACHE_UNHASHED;
- }
+ list = d_hash(target->d_parent, target->d_name.hash);
+ __d_rehash(dentry, list);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
- /* flush any possible attributes */
- if (dentry->d_extra_attributes) {
- kfree(dentry->d_extra_attributes);
- dentry->d_extra_attributes = NULL;
- }
- if (target->d_extra_attributes) {
- kfree(target->d_extra_attributes);
- target->d_extra_attributes = NULL;
- }
-
- list_del(&dentry->d_child);
- list_del(&target->d_child);
+ /* flush any possible attributes */
+ if (dentry->d_extra_attributes) {
+ kfree(dentry->d_extra_attributes);
+ dentry->d_extra_attributes = NULL;
+ }
+ if (target->d_extra_attributes) {
+ kfree(target->d_extra_attributes);
+ target->d_extra_attributes = NULL;
+ }
+
+ list_del(&dentry->d_u.d_child);
+ list_del(&target->d_u.d_child);
/* Switch the names.. */
switch_names(dentry, target);
- smp_wmb();
do_switch(dentry->d_name.len, target->d_name.len);
do_switch(dentry->d_name.hash, target->d_name.hash);
if (IS_ROOT(dentry)) {
dentry->d_parent = target->d_parent;
target->d_parent = target;
- INIT_LIST_HEAD(&target->d_child);
+ INIT_LIST_HEAD(&target->d_u.d_child);
} else {
do_switch(dentry->d_parent, target->d_parent);
/* And add them back to the (new) parent lists */
- list_add(&target->d_child, &target->d_parent->d_subdirs);
+ list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
}
- list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
spin_unlock(&target->d_lock);
+ fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
spin_unlock(&dcache_lock);
struct dentry * saved = new_dentry;
unsigned long seq;
- result = 0;
/* need rcu_readlock to protect against the d_parent trashing due to
* d_move
*/
do {
/* for restarting inner loop in case of seq retry */
new_dentry = saved;
+ result = 0;
seq = read_seqbegin(&rename_lock);
for (;;) {
if (new_dentry != old_dentry) {
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
if (d_unhashed(dentry)||!dentry->d_inode)
continue;
atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
atomic_dec(&this_parent->d_count);
this_parent = this_parent->d_parent;
goto resume;
struct dentry * dentry;
ino_t ino = 0;
- /*
- * Check for a fs-specific hash function. Note that we must
- * calculate the standard hash first, as the d_op->d_hash()
- * routine may choose to leave the hash value unchanged.
- */
- name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash)
- {
- if (dir->d_op->d_hash(dir, name) != 0)
- goto out;
- }
-
- dentry = d_lookup(dir, name);
- if (dentry)
- {
+ dentry = d_hash_and_lookup(dir, name);
+ if (dentry) {
if (dentry->d_inode)
ino = dentry->d_inode->i_ino;
dput(dentry);
}
-out:
return ino;
}
{
int loop;
+ /* If hashes are distributed across NUMA nodes, defer
+ * hash allocation until vmalloc space is available.
+ */
+ if (hashdist)
+ return;
+
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_head),
dhash_entries,
13,
- 0,
+ HASH_EARLY,
&d_hash_shift,
- &d_hash_mask);
+ &d_hash_mask,
+ 0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
static void __init dcache_init(unsigned long mempages)
{
+ int loop;
+
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
dentry_cache = kmem_cache_create("dentry_cache",
sizeof(struct dentry),
0,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
+ SLAB_MEM_SPREAD),
NULL, NULL);
set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+
+ /* Hash may have been set up in dcache_init_early */
+ if (!hashdist)
+ return;
+
+ dentry_hashtable =
+ alloc_large_system_hash("Dentry cache",
+ sizeof(struct hlist_head),
+ dhash_entries,
+ 13,
+ 0,
+ &d_hash_shift,
+ &d_hash_mask,
+ 0);
+
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
/* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep;
+kmem_cache_t *names_cachep __read_mostly;
/* SLAB cache for file structures */
-kmem_cache_t *filp_cachep;
+kmem_cache_t *filp_cachep __read_mostly;
EXPORT_SYMBOL(d_genocide);
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
dcache_init(mempages);
inode_init(mempages);