X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fdcache.c;h=f938ad50ffc6178e66d25a693b0617fdfd341e3d;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=d4fa197bd613a651e8752542d5760dda571d413f;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/dcache.c b/fs/dcache.c index d4fa197bd..f938ad50f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -29,10 +30,12 @@ #include #include #include +#include -#define DCACHE_PARANOIA 1 /* #define DCACHE_DEBUG 1 */ +int sysctl_vfs_cache_pressure = 100; + spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; @@ -40,6 +43,8 @@ EXPORT_SYMBOL(dcache_lock); static kmem_cache_t *dentry_cache; +#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) + /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -61,13 +66,12 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static void d_callback(void *arg) +static void d_callback(struct rcu_head *head) { - struct dentry * dentry = (struct dentry *)arg; + struct dentry * dentry = container_of(head, struct dentry, d_rcu); - if (dname_external(dentry)) { - kfree(dentry->d_qstr); - } + if (dname_external(dentry)) + kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } @@ -79,7 +83,7 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - call_rcu(&dentry->d_rcu, d_callback, dentry); + call_rcu(&dentry->d_rcu, d_callback); } /* @@ -140,6 +144,8 @@ void dput(struct dentry *dentry) return; repeat: + if (atomic_read(&dentry->d_count) == 1) + might_sleep(); if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) return; @@ -161,7 +167,7 @@ repeat: if (d_unhashed(dentry)) goto kill_it; if (list_empty(&dentry->d_lru)) { - dentry->d_vfs_flags |= DCACHE_REFERENCED; + dentry->d_flags |= DCACHE_REFERENCED; list_add(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; } @@ -257,7 +263,7 @@ int d_invalidate(struct dentry * dentry) static inline struct dentry * __dget_locked(struct dentry *dentry) { atomic_inc(&dentry->d_count); - if (atomic_read(&dentry->d_count) == 1) { + if (!list_empty(&dentry->d_lru)) { dentry_stat.nr_unused--; list_del_init(&dentry->d_lru); } @@ -282,12 +288,11 @@ struct dentry * dget_locked(struct dentry *dentry) * any other hashed alias over that one. */ -struct dentry * d_find_alias(struct inode *inode) +static struct dentry * __d_find_alias(struct inode *inode, int want_discon) { struct list_head *head, *next, *tmp; struct dentry *alias, *discon_alias=NULL; - spin_lock(&dcache_lock); head = &inode->i_dentry; next = inode->i_dentry.next; while (next != head) { @@ -298,19 +303,26 @@ struct dentry * d_find_alias(struct inode *inode) if (!d_unhashed(alias)) { if (alias->d_flags & DCACHE_DISCONNECTED) discon_alias = alias; - else { + else if (!want_discon) { __dget_locked(alias); - spin_unlock(&dcache_lock); return alias; } } } if (discon_alias) __dget_locked(discon_alias); - spin_unlock(&dcache_lock); return discon_alias; } +struct dentry * d_find_alias(struct inode *inode) +{ + struct dentry *de; + spin_lock(&dcache_lock); + de = __d_find_alias(inode, 0); + spin_unlock(&dcache_lock); + return de; +} + /* * Try to kill dentries associated with this inode. * WARNING: you must own a reference to inode. @@ -394,8 +406,8 @@ static void prune_dcache(int count) continue; } /* If the dentry was recently referenced, don't free it. */ - if (dentry->d_vfs_flags & DCACHE_REFERENCED) { - dentry->d_vfs_flags &= ~DCACHE_REFERENCED; + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; list_add(&dentry->d_lru, &dentry_unused); dentry_stat.nr_unused++; spin_unlock(&dentry->d_lock); @@ -609,7 +621,7 @@ void shrink_dcache_parent(struct dentry * parent) * * Prune the dentries that are anonymous * - * parsing d_hash list does not read_barrier_depends() as it + * parsing d_hash list does not hlist_for_each_rcu() as it * done under dcache_lock. * */ @@ -624,7 +636,7 @@ void shrink_dcache_anon(struct hlist_head *head) struct dentry *this = hlist_entry(lp, struct dentry, d_hash); if (!list_empty(&this->d_lru)) { dentry_stat.nr_unused--; - list_del(&this->d_lru); + list_del_init(&this->d_lru); } /* @@ -643,30 +655,27 @@ void shrink_dcache_anon(struct hlist_head *head) } /* - * This is called from kswapd when we think we need some more memory. + * Scan `nr' dentries and return the number which remain. + * + * We need to avoid reentering the filesystem if the caller is performing a + * GFP_NOFS allocation attempt. One example deadlock is: + * + * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> + * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> + * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. + * + * In this case we return -1 to tell the caller that we baled. */ static int shrink_dcache_memory(int nr, unsigned int gfp_mask) { if (nr) { - /* - * Nasty deadlock avoidance. - * - * ext2_new_block->getblk->GFP->shrink_dcache_memory-> - * prune_dcache->prune_one_dentry->dput->dentry_iput->iput-> - * inode->i_sb->s_op->put_inode->ext2_discard_prealloc-> - * ext2_free_blocks->lock_super->DEADLOCK. - * - * We should make sure we don't hold the superblock lock over - * block allocations, but for now: - */ - if (gfp_mask & __GFP_FS) - prune_dcache(nr); + if (!(gfp_mask & __GFP_FS)) + return -1; + prune_dcache(nr); } - return dentry_stat.nr_unused; + return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } -#define NAME_ALLOC_LEN(len) ((len+16) & ~15) - /** * d_alloc - allocate a dcache entry * @parent: parent of entry to allocate @@ -677,47 +686,37 @@ static int shrink_dcache_memory(int nr, unsigned int gfp_mask) * copied and the copy passed in may be reused after this call. */ -struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) +struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) { - char * str; struct dentry *dentry; - struct qstr * qstr; + char *dname; dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; if (name->len > DNAME_INLINE_LEN-1) { - qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len), - GFP_KERNEL); - if (!qstr) { + dname = kmalloc(name->len + 1, GFP_KERNEL); + if (!dname) { kmem_cache_free(dentry_cache, dentry); return NULL; } - qstr->name = qstr->name_str; - qstr->len = name->len; - qstr->hash = name->hash; - dentry->d_qstr = qstr; - str = qstr->name_str; } else { - dentry->d_qstr = &dentry->d_name; - str = dentry->d_iname; + dname = dentry->d_iname; } + dentry->d_name.name = dname; - memcpy(str, name->name, name->len); - str[name->len] = 0; + dentry->d_name.len = name->len; + dentry->d_name.hash = name->hash; + memcpy(dname, name->name, name->len); + dname[name->len] = 0; atomic_set(&dentry->d_count, 1); - dentry->d_vfs_flags = DCACHE_UNHASHED; + dentry->d_flags = DCACHE_UNHASHED; dentry->d_lock = SPIN_LOCK_UNLOCKED; - dentry->d_flags = 0; dentry->d_inode = NULL; dentry->d_parent = NULL; - dentry->d_move_count = 0; dentry->d_sb = NULL; - dentry->d_name.name = str; - dentry->d_name.len = name->len; - dentry->d_name.hash = name->hash; dentry->d_op = NULL; dentry->d_fsdata = NULL; dentry->d_mounted = 0; @@ -784,7 +783,8 @@ struct dentry * d_alloc_root(struct inode * root_inode) struct dentry *res = NULL; if (root_inode) { - static const struct qstr name = { .name = "/", .len = 1, .hash = 0 }; + static const struct qstr name = { .name = "/", .len = 1 }; + res = d_alloc(NULL, &name); if (res) { res->d_sb = root_inode->i_sb; @@ -795,10 +795,11 @@ struct dentry * d_alloc_root(struct inode * root_inode) return res; } -static inline struct hlist_head * d_hash(struct dentry * parent, unsigned long hash) +static inline struct hlist_head *d_hash(struct dentry *parent, + unsigned long hash) { - hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash ^ (hash >> D_HASHBITS); + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); return dentry_hashtable + (hash & D_HASHMASK); } @@ -824,7 +825,7 @@ static inline struct hlist_head * d_hash(struct dentry * parent, unsigned long h struct dentry * d_alloc_anon(struct inode *inode) { - static const struct qstr anonstring = { "", 0, 0}; + static const struct qstr anonstring = { .name = "" }; struct dentry *tmp; struct dentry *res; @@ -840,28 +841,27 @@ struct dentry * d_alloc_anon(struct inode *inode) tmp->d_parent = tmp; /* make sure dput doesn't croak */ spin_lock(&dcache_lock); - if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { - /* A directory can only have one dentry. - * This (now) has one, so use it. - */ - res = list_entry(inode->i_dentry.next, struct dentry, d_alias); - __dget_locked(res); - } else { + res = __d_find_alias(inode, 0); + if (!res) { /* attach a disconnected dentry */ res = tmp; tmp = NULL; - if (res) { - spin_lock(&res->d_lock); - res->d_sb = inode->i_sb; - res->d_parent = res; - res->d_inode = inode; - res->d_bucket = d_hash(res, res->d_name.hash); - res->d_flags |= DCACHE_DISCONNECTED; - res->d_vfs_flags &= ~DCACHE_UNHASHED; - list_add(&res->d_alias, &inode->i_dentry); - hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); - spin_unlock(&res->d_lock); - } + spin_lock(&res->d_lock); + res->d_sb = inode->i_sb; + res->d_parent = res; + res->d_inode = inode; + + /* + * Set d_bucket to an "impossible" bucket address so + * that d_move() doesn't get a false positive + */ + res->d_bucket = NULL; + res->d_flags |= DCACHE_DISCONNECTED; + res->d_flags &= ~DCACHE_UNHASHED; + list_add(&res->d_alias, &inode->i_dentry); + hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); + spin_unlock(&res->d_lock); + inode = NULL; /* don't drop reference */ } spin_unlock(&dcache_lock); @@ -883,7 +883,7 @@ struct dentry * d_alloc_anon(struct inode *inode) * DCACHE_DISCONNECTED), then d_move that in place of the given dentry * and return it, else simply d_add the inode to the dentry and return NULL. * - * This is (will be) needed in the lookup routine of any filesystem that is exportable + * This is needed in the lookup routine of any filesystem that is exportable * (via knfsd) so that we can build dcache paths to directories effectively. * * If a dentry was found and moved, then it is returned. Otherwise NULL @@ -894,11 +894,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { struct dentry *new = NULL; - if (inode && S_ISDIR(inode->i_mode)) { + if (inode) { spin_lock(&dcache_lock); - if (!list_empty(&inode->i_dentry)) { - new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - __dget_locked(new); + new = __d_find_alias(inode, 1); + if (new) { + BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_rehash(dentry); @@ -930,8 +930,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * * __d_lookup is dcache_lock free. The hash list is protected using RCU. * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_move_count is - * used. + * To avoid races with d_move while rename is happening, d_lock is used. * * Overflows in memcmp(), while d_move, are avoided by keeping the length * and name pointer in one structure pointed by d_qstr. @@ -940,8 +939,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * lookup is going on. * * dentry_unused list is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb and - * select_parent. This laziness saves lookup from dcache_lock acquisition. + * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, + * select_parent and __dget_locked. This laziness saves lookup from dcache_lock + * acquisition. * * d_lookup() is protected against the concurrent renames in some unrelated * directory using the seqlockt_t rename_lock. @@ -972,25 +972,12 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) rcu_read_lock(); - hlist_for_each (node, head) { + hlist_for_each_rcu(node, head) { struct dentry *dentry; - unsigned long move_count; - struct qstr * qstr; + struct qstr *qstr; - smp_read_barrier_depends(); dentry = hlist_entry(node, struct dentry, d_hash); - /* if lookup ends up in a different bucket - * due to concurrent rename, fail it - */ - if (unlikely(dentry->d_bucket != head)) - break; - - /* - * We must take a snapshot of d_move_count followed by - * read memory barrier before any search key comparison - */ - move_count = dentry->d_move_count; smp_rmb(); if (dentry->d_name.hash != hash) @@ -998,29 +985,43 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) if (dentry->d_parent != parent) continue; - qstr = dentry->d_qstr; - smp_read_barrier_depends(); + spin_lock(&dentry->d_lock); + + /* + * If lookup ends up in a different bucket due to concurrent + * rename, fail it + */ + if (unlikely(dentry->d_bucket != head)) + goto terminate; + + /* + * Recheck the dentry after taking the lock - d_move may have + * changed things. Don't bother checking the hash because we're + * about to compare the whole name anyway. + */ + if (dentry->d_parent != parent) + goto next; + + qstr = rcu_dereference(&dentry->d_name); if (parent->d_op && parent->d_op->d_compare) { if (parent->d_op->d_compare(parent, qstr, name)) - continue; + goto next; } else { if (qstr->len != len) - continue; + goto next; if (memcmp(qstr->name, str, len)) - continue; + goto next; } - spin_lock(&dentry->d_lock); - /* - * If dentry is moved, fail the lookup - */ - if (likely(move_count == dentry->d_move_count)) { - if (!d_unhashed(dentry)) { - atomic_inc(&dentry->d_count); - found = dentry; - } + + if (!d_unhashed(dentry)) { + atomic_inc(&dentry->d_count); + found = dentry; } +terminate: spin_unlock(&dentry->d_lock); break; +next: + spin_unlock(&dentry->d_lock); } rcu_read_unlock(); @@ -1054,7 +1055,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) spin_lock(&dcache_lock); base = d_hash(dparent, dentry->d_name.hash); hlist_for_each(lhp,base) { - /* read_barrier_depends() not required for d_hash list + /* hlist_for_each_rcu() not required for d_hash list * as it is parsed under dcache_lock */ if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { @@ -1118,8 +1119,11 @@ void d_delete(struct dentry * dentry) void d_rehash(struct dentry * entry) { struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); + spin_lock(&dcache_lock); - entry->d_vfs_flags &= ~DCACHE_UNHASHED; + spin_lock(&entry->d_lock); + entry->d_flags &= ~DCACHE_UNHASHED; + spin_unlock(&entry->d_lock); entry->d_bucket = list; hlist_add_head_rcu(&entry->d_hash, list); spin_unlock(&dcache_lock); @@ -1140,28 +1144,40 @@ void d_rehash(struct dentry * entry) * then no longer matches the actual (corrupted) string of the target. * The hash value has to match the hash queue that the dentry is on.. */ -static inline void switch_names(struct dentry * dentry, struct dentry * target) +static void switch_names(struct dentry *dentry, struct dentry *target) { - const unsigned char *old_name, *new_name; - struct qstr *old_qstr, *new_qstr; - - memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); - old_qstr = target->d_qstr; - old_name = target->d_name.name; - new_qstr = dentry->d_qstr; - new_name = dentry->d_name.name; - if (old_name == target->d_iname) { - old_name = dentry->d_iname; - old_qstr = &dentry->d_name; - } - if (new_name == dentry->d_iname) { - new_name = target->d_iname; - new_qstr = &target->d_name; + if (dname_external(target)) { + if (dname_external(dentry)) { + /* + * Both external: swap the pointers + */ + do_switch(target->d_name.name, dentry->d_name.name); + } else { + /* + * dentry:internal, target:external. Steal target's + * storage and make target internal. + */ + dentry->d_name.name = target->d_name.name; + target->d_name.name = target->d_iname; + } + } else { + if (dname_external(dentry)) { + /* + * dentry:external, target:internal. Give dentry's + * storage to target and make dentry internal + */ + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + target->d_name.name = dentry->d_name.name; + dentry->d_name.name = dentry->d_iname; + } else { + /* + * Both are internal. Just copy target to dentry + */ + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + } } - target->d_name.name = new_name; - dentry->d_name.name = old_name; - target->d_qstr = new_qstr; - dentry->d_qstr = old_qstr; } /* @@ -1204,14 +1220,14 @@ void d_move(struct dentry * dentry, struct dentry * target) } /* Move the dentry to the target hash queue, if on different bucket */ - if (dentry->d_vfs_flags & DCACHE_UNHASHED) + if (dentry->d_flags & DCACHE_UNHASHED) goto already_unhashed; if (dentry->d_bucket != target->d_bucket) { hlist_del_rcu(&dentry->d_hash); already_unhashed: dentry->d_bucket = target->d_bucket; hlist_add_head_rcu(&dentry->d_hash, target->d_bucket); - dentry->d_vfs_flags &= ~DCACHE_UNHASHED; + dentry->d_flags &= ~DCACHE_UNHASHED; } /* Unhash the target: dput() will then get rid of it */ @@ -1239,7 +1255,6 @@ already_unhashed: } list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); - dentry->d_move_count++; spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); @@ -1337,6 +1352,7 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *res; struct vfsmount *rootmnt; struct dentry *root; + read_lock(¤t->fs->lock); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); @@ -1547,13 +1563,25 @@ static int __init set_dhash_entries(char *str) } __setup("dhash_entries=", set_dhash_entries); -static void __init dcache_init(unsigned long mempages) +static void __init dcache_init_early(void) { - struct hlist_head *d; - unsigned long order; - unsigned int nr_hash; - int i; + int loop; + + dentry_hashtable = + alloc_large_system_hash("Dentry cache", + sizeof(struct hlist_head), + dhash_entries, + 13, + 0, + &d_hash_shift, + &d_hash_mask); + + for (loop = 0; loop < (1 << d_hash_shift); loop++) + INIT_HLIST_HEAD(&dentry_hashtable[loop]); +} +static void __init dcache_init(unsigned long mempages) +{ /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature @@ -1562,51 +1590,10 @@ static void __init dcache_init(unsigned long mempages) dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry), 0, - SLAB_RECLAIM_ACCOUNT, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, NULL, NULL); - if (!dentry_cache) - panic("Cannot create dentry cache"); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); - - if (!dhash_entries) - dhash_entries = PAGE_SHIFT < 13 ? - mempages >> (13 - PAGE_SHIFT) : - mempages << (PAGE_SHIFT - 13); - - dhash_entries *= sizeof(struct hlist_head); - for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++) - ; - - do { - unsigned long tmp; - - nr_hash = (1UL << order) * PAGE_SIZE / - sizeof(struct hlist_head); - d_hash_mask = (nr_hash - 1); - - tmp = nr_hash; - d_hash_shift = 0; - while ((tmp >>= 1UL) != 0UL) - d_hash_shift++; - - dentry_hashtable = (struct hlist_head *) - __get_free_pages(GFP_ATOMIC, order); - } while (dentry_hashtable == NULL && --order >= 0); - - printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n", - nr_hash, order, (PAGE_SIZE << order)); - - if (!dentry_hashtable) - panic("Failed to allocate dcache hash table\n"); - - d = dentry_hashtable; - i = nr_hash; - do { - INIT_HLIST_HEAD(d); - d++; - i--; - } while (i); } /* SLAB cache for __getname() consumers */ @@ -1620,6 +1607,12 @@ EXPORT_SYMBOL(d_genocide); extern void bdev_cache_init(void); extern void chrdev_init(void); +void __init vfs_caches_init_early(void) +{ + dcache_init_early(); + inode_init_early(); +} + void __init vfs_caches_init(unsigned long mempages) { unsigned long reserve; @@ -1627,20 +1620,14 @@ void __init vfs_caches_init(unsigned long mempages) /* Base hash sizes on available memory, with a reserve equal to 150% of current kernel size */ - reserve = (mempages - nr_free_pages()) * 3/2; + reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", - PATH_MAX, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!names_cachep) - panic("Cannot create names SLAB cache"); - - filp_cachep = kmem_cache_create("filp", - sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor); - if(!filp_cachep) - panic("Cannot create filp SLAB cache"); + names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); dcache_init(mempages); inode_init(mempages);