X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fdcache.c;h=cf32eb26f28930562142d625156cd49995e98729;hb=eb643825dab24bf61fe40ea800c5be013315220d;hp=d4fa197bd613a651e8752542d5760dda571d413f;hpb=86090fcac5e27b630656fe3d963a6b80e26dac44;p=linux-2.6.git diff --git a/fs/dcache.c b/fs/dcache.c index d4fa197bd..cf32eb26f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,8 @@ EXPORT_SYMBOL(dcache_lock); static kmem_cache_t *dentry_cache; +#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) + /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -56,6 +59,9 @@ static unsigned int d_hash_shift; static struct hlist_head *dentry_hashtable; static LIST_HEAD(dentry_unused); +static void prune_dcache(int count); + + /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, @@ -65,9 +71,8 @@ static void d_callback(void *arg) { struct dentry * dentry = (struct dentry *)arg; - if (dname_external(dentry)) { - kfree(dentry->d_qstr); - } + if (dname_external(dentry)) + kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } @@ -79,6 +84,10 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } call_rcu(&dentry->d_rcu, d_callback, dentry); } @@ -161,7 +170,7 @@ repeat: if (d_unhashed(dentry)) goto kill_it; if (list_empty(&dentry->d_lru)) { - dentry->d_vfs_flags |= DCACHE_REFERENCED; + dentry->d_flags |= DCACHE_REFERENCED; list_add(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; } @@ -257,7 +266,7 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - if (atomic_read(&dentry->d_count) == 1) { + if (!list_empty(&dentry->d_lru)) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); } @@ -394,8 +403,8 @@ static void prune_dcache(int count) continue; } /* If the dentry was recently referenced, don't free it. */ - if (dentry->d_vfs_flags & DCACHE_REFERENCED) { - dentry->d_vfs_flags &= ~DCACHE_REFERENCED; + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; list_add(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; spin_unlock(&dentry->d_lock); @@ -643,30 +652,27 @@ void shrink_dcache_anon(struct hlist_head *head) } /* - * This is called from kswapd when we think we need some more memory. + * Scan `nr' dentries and return the number which remain. + * + * We need to avoid reentering the filesystem if the caller is performing a + * GFP_NOFS allocation attempt. One example deadlock is: + * + * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> + * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> + * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. + * + * In this case we return -1 to tell the caller that we baled. */ static int shrink_dcache_memory(int nr, unsigned int gfp_mask) { if (nr) { - /* - * Nasty deadlock avoidance. - * - * ext2_new_block->getblk->GFP->shrink_dcache_memory-> - * prune_dcache->prune_one_dentry->dput->dentry_iput->iput-> - * inode->i_sb->s_op->put_inode->ext2_discard_prealloc-> - * ext2_free_blocks->lock_super->DEADLOCK. - * - * We should make sure we don't hold the superblock lock over - * block allocations, but for now: - */ - if (gfp_mask & __GFP_FS) - prune_dcache(nr); + if (!(gfp_mask & __GFP_FS)) + return -1; + prune_dcache(nr); } return dentry_stat.nr_unused; } -#define NAME_ALLOC_LEN(len) ((len+16) & ~15) - /** * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate @@ -677,49 +683,53 @@ static int shrink_dcache_memory(int nr, unsigned int gfp_mask) * copied and the copy passed in may be reused after this call. */ -struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) +struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) { - char * str; struct dentry *dentry; - struct qstr * qstr; + char *dname; + +#define DENTRY_UNUSED_THRESHOLD 30000 +#define DENTRY_BATCH_COUNT 32 + + if (dentry_stat.nr_unused > DENTRY_UNUSED_THRESHOLD) { + int doit = 1; + spin_lock(&dcache_lock); + if (dentry_stat.nr_unused < DENTRY_UNUSED_THRESHOLD) + doit = 0; + spin_unlock(&dcache_lock); + if (doit) + prune_dcache(DENTRY_BATCH_COUNT); + } dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; if (name->len > DNAME_INLINE_LEN-1) { - qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len), - GFP_KERNEL); - if (!qstr) { + dname = kmalloc(name->len + 1, GFP_KERNEL); + if (!dname) { kmem_cache_free(dentry_cache, dentry); return NULL; } - qstr->name = qstr->name_str; - qstr->len = name->len; - qstr->hash = name->hash; - dentry->d_qstr = qstr; - str = qstr->name_str; } else { - dentry->d_qstr = &dentry->d_name; - str = dentry->d_iname; + dname = dentry->d_iname; } + dentry->d_name.name = dname; - memcpy(str, name->name, name->len); - str[name->len] = 0; + dentry->d_name.len = name->len; + dentry->d_name.hash = name->hash; + memcpy(dname, name->name, name->len); + dname[name->len] = 0; atomic_set(&dentry->d_count, 1); - dentry->d_vfs_flags = DCACHE_UNHASHED; + dentry->d_flags = DCACHE_UNHASHED; dentry->d_lock = SPIN_LOCK_UNLOCKED; - dentry->d_flags = 0; dentry->d_inode = NULL; dentry->d_parent = NULL; - dentry->d_move_count = 0; dentry->d_sb = NULL; - dentry->d_name.name = str; - dentry->d_name.len = name->len; - dentry->d_name.hash = name->hash; dentry->d_op = NULL; dentry->d_fsdata = NULL; + dentry->d_extra_attributes = NULL; dentry->d_mounted = 0; dentry->d_cookie = NULL; dentry->d_bucket = NULL; @@ -784,7 +794,8 @@ struct dentry * d_alloc_root(struct inode * root_inode) struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1, .hash = 0 }; + static const struct qstr name = { .name = "/", .len = 1 }; + res = d_alloc(NULL, &name); if (res) { res->d_sb = root_inode->i_sb; @@ -795,10 +806,11 @@ struct dentry * d_alloc_root(struct inode * root_inode) return res; } -static inline struct hlist_head * d_hash(struct dentry * parent, unsigned long hash) +static inline struct hlist_head *d_hash(struct dentry *parent, + unsigned long hash) { - hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash ^ (hash >> D_HASHBITS); + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); return dentry_hashtable + (hash & D_HASHMASK); } @@ -824,7 +836,7 @@ static inline struct hlist_head * d_hash(struct dentry * parent, unsigned long h struct dentry * d_alloc_anon(struct inode *inode) { - static const struct qstr anonstring = { "", 0, 0}; + static const struct qstr anonstring = { .name = "" }; struct dentry *tmp; struct dentry *res; @@ -855,9 +867,14 @@ struct dentry * d_alloc_anon(struct inode *inode) res->d_sb = inode->i_sb; res->d_parent = res; res->d_inode = inode; - res->d_bucket = d_hash(res, res->d_name.hash); + + /* + * Set d_bucket to an "impossible" bucket address so + * that d_move() doesn't get a false positive + */ + res->d_bucket = NULL; res->d_flags |= DCACHE_DISCONNECTED; - res->d_vfs_flags &= ~DCACHE_UNHASHED; + res->d_flags &= ~DCACHE_UNHASHED; list_add(&res->d_alias, &inode->i_dentry); hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); spin_unlock(&res->d_lock); @@ -930,8 +947,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * * __d_lookup is dcache_lock free. The hash list is protected using RCU. * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_move_count is - * used. + * To avoid races with d_move while rename is happening, d_lock is used. * * Overflows in memcmp(), while d_move, are avoided by keeping the length * and name pointer in one structure pointed by d_qstr. @@ -940,8 +956,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * lookup is going on. * * dentry_unused list is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb and - * select_parent. This laziness saves lookup from dcache_lock acquisition. + * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, + * select_parent and __dget_locked. This laziness saves lookup from dcache_lock + * acquisition. * * d_lookup() is protected against the concurrent renames in some unrelated * directory using the seqlockt_t rename_lock. @@ -974,23 +991,11 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) hlist_for_each (node, head) { struct dentry *dentry; - unsigned long move_count; - struct qstr * qstr; + struct qstr *qstr; smp_read_barrier_depends(); dentry = hlist_entry(node, struct dentry, d_hash); - /* if lookup ends up in a different bucket - * due to concurrent rename, fail it - */ - if (unlikely(dentry->d_bucket != head)) - break; - - /* - * We must take a snapshot of d_move_count followed by - * read memory barrier before any search key comparison - */ - move_count = dentry->d_move_count; smp_rmb(); if (dentry->d_name.hash != hash) @@ -998,29 +1003,44 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) if (dentry->d_parent != parent) continue; - qstr = dentry->d_qstr; + spin_lock(&dentry->d_lock); + + /* + * If lookup ends up in a different bucket due to concurrent + * rename, fail it + */ + if (unlikely(dentry->d_bucket != head)) + goto terminate; + + /* + * Recheck the dentry after taking the lock - d_move may have + * changed things. Don't bother checking the hash because we're + * about to compare the whole name anyway. + */ + if (dentry->d_parent != parent) + goto next; + + qstr = &dentry->d_name; smp_read_barrier_depends(); if (parent->d_op && parent->d_op->d_compare) { if (parent->d_op->d_compare(parent, qstr, name)) - continue; + goto next; } else { if (qstr->len != len) - continue; + goto next; if (memcmp(qstr->name, str, len)) - continue; + goto next; } - spin_lock(&dentry->d_lock); - /* - * If dentry is moved, fail the lookup - */ - if (likely(move_count == dentry->d_move_count)) { - if (!d_unhashed(dentry)) { - atomic_inc(&dentry->d_count); - found = dentry; - } + + if (!d_unhashed(dentry)) { + atomic_inc(&dentry->d_count); + found = dentry; } +terminate: spin_unlock(&dentry->d_lock); break; +next: + spin_unlock(&dentry->d_lock); } rcu_read_unlock(); @@ -1118,8 +1138,11 @@ void d_delete(struct dentry * dentry) void d_rehash(struct dentry * entry) { struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); + spin_lock(&dcache_lock); - entry->d_vfs_flags &= ~DCACHE_UNHASHED; + spin_lock(&entry->d_lock); + entry->d_flags &= ~DCACHE_UNHASHED; + spin_unlock(&entry->d_lock); entry->d_bucket = list; hlist_add_head_rcu(&entry->d_hash, list); spin_unlock(&dcache_lock); @@ -1140,28 +1163,40 @@ void d_rehash(struct dentry * entry) * then no longer matches the actual (corrupted) string of the target. * The hash value has to match the hash queue that the dentry is on.. */ -static inline void switch_names(struct dentry * dentry, struct dentry * target) +static void switch_names(struct dentry *dentry, struct dentry *target) { - const unsigned char *old_name, *new_name; - struct qstr *old_qstr, *new_qstr; - - memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); - old_qstr = target->d_qstr; - old_name = target->d_name.name; - new_qstr = dentry->d_qstr; - new_name = dentry->d_name.name; - if (old_name == target->d_iname) { - old_name = dentry->d_iname; - old_qstr = &dentry->d_name; - } - if (new_name == dentry->d_iname) { - new_name = target->d_iname; - new_qstr = &target->d_name; + if (dname_external(target)) { + if (dname_external(dentry)) { + /* + * Both external: swap the pointers + */ + do_switch(target->d_name.name, dentry->d_name.name); + } else { + /* + * dentry:internal, target:external. Steal target's + * storage and make target internal. + */ + dentry->d_name.name = target->d_name.name; + target->d_name.name = target->d_iname; + } + } else { + if (dname_external(dentry)) { + /* + * dentry:external, target:internal. Give dentry's + * storage to target and make dentry internal + */ + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + target->d_name.name = dentry->d_name.name; + dentry->d_name.name = dentry->d_iname; + } else { + /* + * Both are internal. Just copy target to dentry + */ + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + } } - target->d_name.name = new_name; - dentry->d_name.name = old_name; - target->d_qstr = new_qstr; - dentry->d_qstr = old_qstr; } /* @@ -1204,19 +1239,29 @@ void d_move(struct dentry * dentry, struct dentry * target) } /* Move the dentry to the target hash queue, if on different bucket */ - if (dentry->d_vfs_flags & DCACHE_UNHASHED) + if (dentry->d_flags & DCACHE_UNHASHED) goto already_unhashed; if (dentry->d_bucket != target->d_bucket) { hlist_del_rcu(&dentry->d_hash); already_unhashed: dentry->d_bucket = target->d_bucket; hlist_add_head_rcu(&dentry->d_hash, target->d_bucket); - dentry->d_vfs_flags &= ~DCACHE_UNHASHED; + dentry->d_flags &= ~DCACHE_UNHASHED; } /* Unhash the target: dput() will then get rid of it */ __d_drop(target); + /* flush any possible attributes */ + if (dentry->d_extra_attributes) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } + if (target->d_extra_attributes) { + kfree(target->d_extra_attributes); + target->d_extra_attributes = NULL; + } + list_del(&dentry->d_child); list_del(&target->d_child); @@ -1239,7 +1284,6 @@ already_unhashed: } list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); - dentry->d_move_count++; spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); @@ -1262,7 +1306,7 @@ already_unhashed: * * "buflen" should be positive. Caller holds the dcache_lock. */ -static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, +char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, struct dentry *root, struct vfsmount *rootmnt, char *buffer, int buflen) { @@ -1330,6 +1374,8 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +EXPORT_SYMBOL_GPL(__d_path); + /* write full pathname into buffer and return start of pathname */ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf, int buflen) @@ -1337,6 +1383,7 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *res; struct vfsmount *rootmnt; struct dentry *root; + read_lock(¤t->fs->lock); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); @@ -1547,6 +1594,23 @@ static int __init set_dhash_entries(char *str) } __setup("dhash_entries=", set_dhash_entries); +void flush_dentry_attributes (void) +{ + struct hlist_node *tmp; + struct dentry *dentry; + int i; + + spin_lock(&dcache_lock); + for (i = 0; i <= d_hash_mask; i++) + hlist_for_each_entry(dentry, tmp, dentry_hashtable+i, d_hash) { + kfree(dentry->d_extra_attributes); + dentry->d_extra_attributes = NULL; + } + spin_unlock(&dcache_lock); +} + +EXPORT_SYMBOL_GPL(flush_dentry_attributes); + static void __init dcache_init(unsigned long mempages) { struct hlist_head *d; @@ -1562,10 +1626,8 @@ static void __init dcache_init(unsigned long mempages) dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry), 0, - SLAB_RECLAIM_ACCOUNT, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, NULL, NULL); - if (!dentry_cache) - panic("Cannot create dentry cache"); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); @@ -1577,6 +1639,9 @@ static void __init dcache_init(unsigned long mempages) dhash_entries *= sizeof(struct hlist_head); for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++) ; + + if (order > 5) + order = 5; do { unsigned long tmp; @@ -1627,20 +1692,14 @@ void __init vfs_caches_init(unsigned long mempages) /* Base hash sizes on available memory, with a reserve equal to 150% of current kernel size */ - reserve = (mempages - nr_free_pages()) * 3/2; + reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", - PATH_MAX, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!names_cachep) - panic("Cannot create names SLAB cache"); - - filp_cachep = kmem_cache_create("filp", - sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor); - if(!filp_cachep) - panic("Cannot create filp SLAB cache"); + names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); dcache_init(mempages); inode_init(mempages);